Bootstrap

Python 批量操作 数据库的记录

#  1. ES批量操作

from elasticsearch5 import Elasticsearch, helpers
es = Elasticsearch([{"host": ELASTIC_SEARCH_IP, "port": 9200, "timeout": 1500}])
## 批量查询
es_result = helpers.scan(
            client=es,
            query=body,
            scroll='5m',  # 5m表示分钟, y-年M:月,w周,d天,h小时m分钟,s秒
            index=idx,
            timeout="5m"  # timeout
        )

## 批量插入

action = {
            "_index": "idx",
            "_type": "news",
            "_id": new_id,
            "_source": line["_source"]  # 部分字段更新使用doc,全字段更新使用_source
        }

actions.append(action)
result_es = helpers.bulk(es, actions, stats_only=True, raise_on_error=False, refresh=True)

## 批量修改
1.相同数据的统一修改
updateBody = {
        "query":{
            "range":{
              "write_date": {
                "gte": "2019-01-15 12:30:17",
                "lte": "now"
              }
              }
        },
    "script": {
        "inline": "ctx._source.index = params.index",
        "params": {
            "index": 211
        },
        "lang":"painless"
 
    }
}
es_client.update_by_query(index="log_index",doc_type='log_index',body=updateBody)

## 批量在Python中的拼接格式,除了delete之外,其他的都是两句对应一个操作
## 第一句用来定位什么操作,第二句是操作成什么样子。
doc = [
    {'update':{'_index':'ecommerce','_id':'11'}},
    {'doc':{'price':1314}},
    {'update': {'_index': 'ecommerce', '_id': '1'}},
    {'doc': {'price': 7758}},
]
doc = [
     {"index": {}},  # 不加索引在bulk的时候指定一共统一的也可以
     {'name': 'jackaaa', 'age': 2000, 'sex': 'female', 'address': u'北京'},
     {"index": {}},
     {'name': 'jackbbb', 'age': 3000, 'sex': 'male', 'address': u'上海'},
     {"index": {}},
     {'name': 'jackccc', 'age': 4000, 'sex': 'female', 'address': u'广州'},
     {"index": {}},
     {'name': 'jackddd', 'age': 1000, 'sex': 'male', 'address': u'深圳'},
 ]
 dd = es.bulk(index='indexName',  doc_type='typeName', body=doc)
result = es.bulk(body=doc, index="ecommerce")


代价较小的批量操作
python3 更新和删除 elasticsearch 及其删除es索引
es 批量操作详细讲
如何使用python对ES进行批量更新操作

## mongo批量操作
import pymongo
from pymongo import UpdateOne, InsertOne
mong_cient = pymongo.MongoClient(host=MONGODB_IP, port=MONGODB_PORT, connect=False)
coll = mngo.collect
operations.append(UpdateOne({"_id": k},{"$inc": data}))
operations.append(InsertOne({"_id": k}))
result = coll.bulk_write(operations)
# MYSQL 批量更新
import pymysql
_conn = pymysql.connect(
	host=self.host,user=self.user,
	passwd=self.pwd,db=self.db,charset=self.charset)
_cursor = _conn.cursor()
sql_insert = "INSERT INTO `test_news`.`test_news_detail` (`nid`, `title`, `author`, `content`, `pub_time`) values (%s, %s, %s, %s, %s)"
data_list = [('1','标题','','',''), ('1','标题','','',''),('1','标题','','','')]
_cursor.executemany(sql_insert, data_list)

# 一些有意思的sql 
1.匹配字符串模式
today_user_read = "SELECT cuid, sum(duration) as read_time FROM   WHERE  `cuid` REGEXP '^[1][35678][0-9]{9}$' and duration > 0 and action_type in (31, 32) group by cuid"
cuid_read_time = PyMysqlClient(nomarl_class=False).query(today_user_read)  # 加上False这个

2.根据表里的数据增加  
可以在UPDATE子句中使用VALUES(col_name)函数从INSERT...UPDATE语句的INSERT部分引用列值。
`换句话说,如果没有发生重复关键字冲突,则UPDATE子句中的VALUES(col_name)可以引用被插入的col_name的值。本函数特别适用于多行插入。VALUES()函数只在INSERT...UPDATE语句中有意义,其它时候会返回NULL。`

tb_fiction = "insert into tb_fiction_gold (mobile,gold_num, reading_time) values (%s, %s, %s) on duplicate key update gold_num=values(gold_num)+gold_num, reading_time=values(reading_time)"
qiyu_mp_client = PyMysqlClient()
qiyu_mp_client.query_many(tb_fiction, list_data)

# 第二个更新语句
sql = "insert into tb_user_read (`novel_id`, `user_id`, `chapter_id`, `app_id`, `read_num`) value ('%s', '%s', '%s', '%s',1) on duplicate key update `chapter_id`=values(`chapter_id`),  `read_num`=1+`read_num` 

3.替换语句
sql_ = "replace into global_statistic (`id`, `read_num`, `star_num`) values ('%s', '%s', '%s')"

4.巧妙的拼接一个通用的更新语句
tmp_body = {"k1":"v1","k2":"v2","k3":"v3","k4":"v4"}
table = 'tb_app_table'
keys = '`' + '`, `'.join(tmp_body.keys()) + '`'
values = ', '.join(['"%s"'] * len(tmp_body))
sql = 'INSERT INTO {table}({keys}) VALUES ({values}) ON DUPLICATE KEY UPDATE '.format(table=table, keys=keys, values=values)
update = ','.join([' `{key}` = "%s"'.format(key=key) for key in tmp_body])
sql += update

;