# 1. ES批量操作
from elasticsearch5 import Elasticsearch, helpers
es = Elasticsearch([{"host": ELASTIC_SEARCH_IP, "port": 9200, "timeout": 1500}])
## 批量查询
es_result = helpers.scan(
client=es,
query=body,
scroll='5m', # 5m表示分钟, y-年M:月,w周,d天,h小时m分钟,s秒
index=idx,
timeout="5m" # timeout
)
## 批量插入
action = {
"_index": "idx",
"_type": "news",
"_id": new_id,
"_source": line["_source"] # 部分字段更新使用doc,全字段更新使用_source
}
actions.append(action)
result_es = helpers.bulk(es, actions, stats_only=True, raise_on_error=False, refresh=True)
## 批量修改
1.相同数据的统一修改
updateBody = {
"query":{
"range":{
"write_date": {
"gte": "2019-01-15 12:30:17",
"lte": "now"
}
}
},
"script": {
"inline": "ctx._source.index = params.index",
"params": {
"index": 211
},
"lang":"painless"
}
}
es_client.update_by_query(index="log_index",doc_type='log_index',body=updateBody)
## 批量在Python中的拼接格式,除了delete之外,其他的都是两句对应一个操作
## 第一句用来定位什么操作,第二句是操作成什么样子。
doc = [
{'update':{'_index':'ecommerce','_id':'11'}},
{'doc':{'price':1314}},
{'update': {'_index': 'ecommerce', '_id': '1'}},
{'doc': {'price': 7758}},
]
doc = [
{"index": {}}, # 不加索引在bulk的时候指定一共统一的也可以
{'name': 'jackaaa', 'age': 2000, 'sex': 'female', 'address': u'北京'},
{"index": {}},
{'name': 'jackbbb', 'age': 3000, 'sex': 'male', 'address': u'上海'},
{"index": {}},
{'name': 'jackccc', 'age': 4000, 'sex': 'female', 'address': u'广州'},
{"index": {}},
{'name': 'jackddd', 'age': 1000, 'sex': 'male', 'address': u'深圳'},
]
dd = es.bulk(index='indexName', doc_type='typeName', body=doc)
result = es.bulk(body=doc, index="ecommerce")
代价较小的批量操作
python3 更新和删除 elasticsearch 及其删除es索引
es 批量操作详细讲
如何使用python对ES进行批量更新操作
## mongo批量操作
import pymongo
from pymongo import UpdateOne, InsertOne
mong_cient = pymongo.MongoClient(host=MONGODB_IP, port=MONGODB_PORT, connect=False)
coll = mngo.collect
operations.append(UpdateOne({"_id": k},{"$inc": data}))
operations.append(InsertOne({"_id": k}))
result = coll.bulk_write(operations)
# MYSQL 批量更新
import pymysql
_conn = pymysql.connect(
host=self.host,user=self.user,
passwd=self.pwd,db=self.db,charset=self.charset)
_cursor = _conn.cursor()
sql_insert = "INSERT INTO `test_news`.`test_news_detail` (`nid`, `title`, `author`, `content`, `pub_time`) values (%s, %s, %s, %s, %s)"
data_list = [('1','标题','','',''), ('1','标题','','',''),('1','标题','','','')]
_cursor.executemany(sql_insert, data_list)
# 一些有意思的sql
1.匹配字符串模式
today_user_read = "SELECT cuid, sum(duration) as read_time FROM WHERE `cuid` REGEXP '^[1][35678][0-9]{9}$' and duration > 0 and action_type in (31, 32) group by cuid"
cuid_read_time = PyMysqlClient(nomarl_class=False).query(today_user_read) # 加上False这个
2.根据表里的数据增加
可以在UPDATE子句中使用VALUES(col_name)函数从INSERT...UPDATE语句的INSERT部分引用列值。
`换句话说,如果没有发生重复关键字冲突,则UPDATE子句中的VALUES(col_name)可以引用被插入的col_name的值。本函数特别适用于多行插入。VALUES()函数只在INSERT...UPDATE语句中有意义,其它时候会返回NULL。`
tb_fiction = "insert into tb_fiction_gold (mobile,gold_num, reading_time) values (%s, %s, %s) on duplicate key update gold_num=values(gold_num)+gold_num, reading_time=values(reading_time)"
qiyu_mp_client = PyMysqlClient()
qiyu_mp_client.query_many(tb_fiction, list_data)
# 第二个更新语句
sql = "insert into tb_user_read (`novel_id`, `user_id`, `chapter_id`, `app_id`, `read_num`) value ('%s', '%s', '%s', '%s',1) on duplicate key update `chapter_id`=values(`chapter_id`), `read_num`=1+`read_num`
3.替换语句
sql_ = "replace into global_statistic (`id`, `read_num`, `star_num`) values ('%s', '%s', '%s')"
4.巧妙的拼接一个通用的更新语句
tmp_body = {"k1":"v1","k2":"v2","k3":"v3","k4":"v4"}
table = 'tb_app_table'
keys = '`' + '`, `'.join(tmp_body.keys()) + '`'
values = ', '.join(['"%s"'] * len(tmp_body))
sql = 'INSERT INTO {table}({keys}) VALUES ({values}) ON DUPLICATE KEY UPDATE '.format(table=table, keys=keys, values=values)
update = ','.join([' `{key}` = "%s"'.format(key=key) for key in tmp_body])
sql += update