通过java代码修改es的配置,当数据量超过配置的最大返回记录数时,修改查询最大返回记录数;经修改测验,14W数据可以正常查询,并能进行正常的翻页操作,查询效率也基本未受影响; 这之前通过游标scroll的方式进行大数据量的查询,虽然不受配置的最大返回记录数的限制,但是数据量超过10W时,翻页越往后越慢。
理论上是可以通过此方式查询出全量数据的,除非数据量太大,出现OOM
/** * 更新es查询的最大返回记录数 * * @param indexName index * @param maxResultNum 最大返回记录数 */ public static void updateMaxResultSetting(String indexName, long maxResultNum) { log.info("[updateSetting] 准备更新es查询最大返回记录数, MaxResultNum:" + maxResultNum); Client client = ElasticSearchManager.getClient(); UpdateSettingsRequest request = new UpdateSettingsRequest(indexName); String settingKey = "index.max_result_window"; Settings settings = Settings.builder().put(settingKey, maxResultNum).build(); request.settings(settings); request.indicesOptions(IndicesOptions.lenientExpandOpen()); client.admin().indices().updateSettings(request); /* * RefreshRequest refreshRequest = new RefreshRequest(indexName); * client.admin().indices().refresh(refreshRequest); */ log.info("[updateSetting] 更新es查询最大返回记录数成功!"); } /** * 更新es查询的最大返回记录数 * * @param indexNameList index名称列表 * @param maxResult 要设置的最大返回记录数 */ public static void updateMaxResultSetting(List<String> indexNameList, long maxResult) { for (int i = 0; i < indexNameList.size(); i++) { String indexName = indexNameList.get(i); try { long curMaxResult = getMaxResultSetting(indexName); if (curMaxResult < maxResult) { updateMaxResultSetting(indexName, maxResult); } } catch (Exception e) { log.error("updateMaxResultSetting by indexNames eror, indexNam: " + indexName + " error: " + e.getMessage()); } } } /** * 获取es查询的最大返回记录数 * * @param indexName index * @return 设置的查询最大返回记录数 */ public static long getMaxResultSetting(String indexName) { log.info("[updateSetting] 开始获取es查询最大返回记录数"); Client client = ElasticSearchManager.getClient(); GetSettingsRequest request = new GetSettingsRequest().indices(indexName); request.names("index.max_result_window"); ActionFuture<GetSettingsResponse> future = client.admin().indices().getSettings(request); GetSettingsResponse response = future.actionGet(); String numberOfMaxResult = response.getSetting(indexName, "index.max_result_window"); return Long.parseLong(numberOfMaxResult); } /** * 资产查询 * @param indexName * @param queryBean * @param pageNo * @param pageSize * @return */ public static Map<String, Object> assetESQuery(String type,String sort,String order,Map<String, Object> map,int pageNo, int pageSize){ Client client = ElasticSearchManager.getClient(); //如果不存在则为空数据 if (!isExistsIndexAndType(client, "cems", type)) { JSONArray array = new JSONArray(); Map<String, Object> dataMap = new HashMap<String, Object>(); dataMap.put("data", array); dataMap.put("total", 0); return dataMap; } SearchRequestBuilder searchRequestBuilder = client.prepareSearch("cems").setTypes(type); BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); for (String key : map.keySet()) { if (key.indexOf("*") != -1) { boolQueryBuilder.must(QueryBuilders.termsQuery(key.replace("*","").trim(), map.get(key).toString())); } else { if((key.startsWith("begin") || key.startsWith("end")) && key.endsWith("Time")){ continue; } if("rangeTime".equals(map.get(key))){ Long beginTime = (Long)map.get("begin"+key); Long endTime = (Long)map.get("end"+key); boolQueryBuilder.must(QueryBuilders.rangeQuery(key).from(beginTime).to(endTime)); }else{ boolQueryBuilder.must(QueryBuilders.wildcardQuery(key, "*"+map.get(key).toString()+"*")); } } } searchRequestBuilder.setQuery(boolQueryBuilder); if (StringUtils.isNotBlank(sort)) { searchRequestBuilder.addSort(sort, order == null || "desc".equals(order) ? SortOrder.DESC : SortOrder.ASC); } searchRequestBuilder.setFrom((pageNo-1)*pageSize).setSize(pageSize); JSONArray array = new JSONArray(); Map<String, Object> dataMap = new HashMap<String, Object>(); try { SearchResponse response = searchRequestBuilder.get(); SearchHits hits = response.getHits(); long total = hits.getTotalHits(); // 获取配置信息--查询最大返回记录数 long maxResult = getMaxResultSetting("cems"); // 如果设置的最大返回值小于数据记录数 if(maxResult < total){ // 更新es查询的最大返回记录数, 扩大5倍 updateMaxResultSetting("cems", total / 10000 * 5 * 10000); } for(int i = 0; i < hits.getHits().length; i++) { SearchHit searchHit = hits.getHits()[i]; JSONObject object = JSONObject.fromObject(searchHit.getSourceAsString()); array.add(object); } dataMap.put("data", array); dataMap.put("total", total); } catch (Exception e) { log.error("[cascadeESQuery] es查询出错, error:" + e.getMessage()); } return dataMap; }