python利用scroll_id游标遍历查询es,并将所有查询结果写入txt
#download all hits from index of es #use scroll_id from elasticsearch import Elasticsearch import json es=Elasticsearch(["localhost:9200"]) body={ "_source":["fileName","fullPath","HashFeature"], #"_source":["fileName"], #"_source":["fullPath"], #"_source":["HashFeature"], "query":{ "match_all":{} } } def get_search_result(es,index,doc_type,scroll='5m',timeout='1m',size=1000,body=body): queryData = es.search( index = index, doc_type = doc_type, scroll = scroll, timeout = timeout, size = size, body = body ) mdata = queryData.get("hits").get("hits") if not mdata: print('empty') scroll_id = queryData["_scroll_id"] total = queryData["hits"]["total"] for i in range(int(total/1000)): res = es.scroll(scroll_id=scroll_id,scroll='5m') mdata = mdata + res["hits"]["hits"] return mdata if __name__ == "__main__": result = get_search_result(es,'dh02_20180227','cc06_design') #source = result['_source'] f = open('G:/lab614/12000.txt','w') for item in result: item2 = item['_source'] item_fullPath = item['_source']['fullPath'] item_HashFeature = item['_source']['HashFeature'] item_fileName = item['_source']['fileName'] f.writelines(item_fullPath+','+item_HashFeature+','+item_fileName) f.write('\n') f.close #print(result)