Bootstrap

ElasticSearch easy-es 聚合函数 group by 混合写法求Top N 词云 分词

1.将用户访问记录表数据同步到ES,并且分词,获取用户访问最多前十条词语。

Elasticsearch、Easy-es 快速入门 SearchAfterPage分页 若依前后端分离 Ruoyi-Vue SpringBoot

使用结巴分词器

        <!-- 分词器-->
        <dependency>
            <groupId>com.huaban</groupId>
            <artifactId>jieba-analysis</artifactId>
            <version>1.0.2</version>
        </dependency>

初始化数据



    public void createAndInitToES() {
        // 1.初始化-> 创建索引(相当于mysql中的表)
        esTzLyViewCountMapper.deleteIndex("tz_ly_view_count");
        esTzLyViewCountMapper.createIndex();
        esTzLyViewCountWordFrequencyMapper.deleteIndex("tz_ly_view_count_word");
        esTzLyViewCountWordFrequencyMapper.createIndex();
        MPJLambdaWrapper<TzLyViewCount> tzLyViewCountWrapper = JoinWrappers.lambda(TzLyViewCount.class)
                .selectAll(TzLyViewCount.class)
                .selectAs("lyUser", TzLyUser::getGender, TzLyViewCount::getGender)
                .selectAs("lyUser", TzLyUser::getEthnicity, TzLyViewCount::getEthnicity)
                .selectAs("lyUser", TzLyUser::getPoliticalStatus, TzLyViewCount::getPoliticalStatus)
                .selectAs("lyUser", TzLyUser::getDomain, TzLyViewCount::getDomain)
                .selectAs("lyUser", TzLyUser::getAreaCode, TzLyViewCount::getAreaCode)
                .leftJoin(TzLyUser.class, "lyUser", on -> on.eq(TzLyViewCount::getViewUserId, TzLyUser::getId));

        //本地限制数据大小
        tzLyViewCountWrapper.between(TzLyViewCount::getCreatedDate, DateUtil.parse("2024-01-01 00:00:00"), DateUtil.parse("2024-02-01 00:00:00"));

        int currentPage = 1;
        int pageSize = 1000;

        while (true) {
            PageUtils.startPage(currentPage, pageSize);
            List<TzLyViewCount> list = tzLyViewCountMapper.selectList(tzLyViewCountWrapper);
            // 如果当前页没有数据,说明查询结束
            if (CollectionUtil.isEmpty(list)) {
                break;
            }
            EsTzLyViewCount esTzLyViewCount = null;
            List<EsTzLyViewCount> esTzLyViewCountList = new ArrayList<>(list.size() * 2);
            for (TzLyViewCount viewCount : list) {
                esTzLyViewCount = new EsTzLyViewCount();
                BeanUtils.copyBeanProp(esTzLyViewCount, viewCount);
                esTzLyViewCountList.add(esTzLyViewCount);
            }
            splitWordAndInsertEs(esTzLyViewCountList);
            // 准备查询下一页
            currentPage++;
        }
    }

    public void splitWordAndInsertEs(List<EsTzLyViewCount> esTzLyViewCountList) {
        JiebaSegmenter js = new JiebaSegmenter();
        EsTzLyViewCountWord esTzLyViewCountWord = null;
        List<EsTzLyViewCountWord> esTzLyViewCountWordList = new ArrayList<>();
        for (EsTzLyViewCount esTzLyViewCount : esTzLyViewCountList) {
            if (StringUtils.isNotBlank(esTzLyViewCount.getViewTitle())) {
                List<String> wordList = js.sentenceProcess(esTzLyViewCount.getViewTitle());
                for (String word : wordList) {
                    if (word.length() >= 2) {
                        esTzLyViewCountWord = new EsTzLyViewCountWord();
                        BeanUtils.copyBeanProp(esTzLyViewCountWord, esTzLyViewCount);
                        esTzLyViewCountWord.setViewTitleWord(word);
                        esTzLyViewCountWordList.add(esTzLyViewCountWord);
                    }
                }
            }
        }
        esTzLyViewCountWordFrequencyMapper.insertBatch(esTzLyViewCountWordList);
        esTzLyViewCountMapper.insertBatch(esTzLyViewCountList);
    }
easy-es 聚合函数 混合写法
 /**
     * 访问信息关键词
     */
    public List<HashMap<String, Long>> accessInformationKeyword(StatisticsDateQueryVo statisticsDateQueryVo) {
        LambdaEsQueryWrapper<EsTzLyViewCountWord> wrapper = new LambdaEsQueryWrapper<>();

        //不返回任何文档
        wrapper.size(0);
        //限制文档聚合范围
        if (ObjectUtils.isNotEmpty(statisticsDateQueryVo.getBgnTimestamp()) && StringUtils.isNotNull(statisticsDateQueryVo.getEndTimestamp())) {
            Date bgnTimestamp = new Date(statisticsDateQueryVo.getBgnTimestamp().getTime());
            Date endTimestamp = new Date(statisticsDateQueryVo.getEndTimestamp().getTime());
            wrapper.between(EsTzLyViewCountWord::getCreatedDate, bgnTimestamp, endTimestamp);
        } else {
            // 获取当前年份(假设 statisticsDateQueryVo.getQueryTimestamp() 返回的是Date类型)
            Date queryDate = statisticsDateQueryVo.getQueryTimestamp();
            Calendar calendar = Calendar.getInstance();
            calendar.setTime(queryDate);
            int currentYear = calendar.get(Calendar.YEAR);

            // 获取当前年份的第一天(使用Calendar来设置日期部分)
            calendar.set(Calendar.YEAR, currentYear);
            calendar.set(Calendar.MONTH, Calendar.JANUARY);
            calendar.set(Calendar.DAY_OF_MONTH, 1);
            calendar.set(Calendar.HOUR_OF_DAY, 0);
            calendar.set(Calendar.MINUTE, 0);
            calendar.set(Calendar.SECOND, 0);
            calendar.set(Calendar.MILLISECOND, 0);
            Date firstDayOfYear = calendar.getTime();

            // 获取当前年份的最后一天(使用Calendar来设置日期部分)
            calendar.set(Calendar.YEAR, currentYear);
            calendar.set(Calendar.MONTH, Calendar.DECEMBER);
            calendar.set(Calendar.DAY_OF_MONTH, 31);
            calendar.set(Calendar.HOUR_OF_DAY, 23);
            calendar.set(Calendar.MINUTE, 59);
            calendar.set(Calendar.SECOND, 59);
            calendar.set(Calendar.MILLISECOND, 999);
            Date lastDayOfYear = calendar.getTime();
            // 设置查询条件(这里的wrapper类似前面示例中的查询条件包装类,需根据实际调整)
            wrapper.between(EsTzLyViewCountWord::getCreatedDate, firstDayOfYear, lastDayOfYear);
        }
        AggregationBuilder aggregation = AggregationBuilders
                .terms("viewTitleWordAgg")// 自定义聚合名
                .field("viewTitleWord") //聚合字段
//                .order() 排序,默认降序
                .size(10); //要十条记录
        SearchSourceBuilder searchSourceBuilder = esTzLyViewCountWordFrequencyMapper.getSearchSourceBuilder(wrapper);
        searchSourceBuilder.aggregation(aggregation);
        wrapper.setSearchSourceBuilder(searchSourceBuilder);
        SearchResponse searchResponse = esTzLyViewCountWordFrequencyMapper.search(wrapper);
        List<HashMap<String, Long>> list = new ArrayList<>();

        Aggregation viewTitleWordAgg = searchResponse.getAggregations().getAsMap().get("viewTitleWordAgg");
        for (int i = 0; i < ((Terms) viewTitleWordAgg).getBuckets().size(); i++) {
            Terms.Bucket bucket = ((Terms) viewTitleWordAgg).getBuckets().get(i);
            HashMap<String, Long> hashMap = new HashMap<>();
            hashMap.put(bucket.getKeyAsString(), bucket.getDocCount());
            list.add(hashMap);
        }
        return list;
    }
;