Bootstrap

MongoDB - 组合聚合阶段:$group、$match、$limit、$sort、$skip、$project、$count


根据工作中常见的业务需求,构造了一些场景来练习 mongodb 聚合阶段的使用。

1. $group

$group 根据单个字段对文档进行分组。

构造测试数据:

db.sales.drop()

db.sales.insertMany([
    { "_id": 1, "product": "A", "category": "Electronics", "quantity": 10, "price": 100 },
    { "_id": 2, "product": "B", "category": "Electronics", "quantity": 5, "price": 200 },
	{ "_id": 3, "product": "C", "category": "Electronics", "quantity": 5, "price": 300 },
	{ "_id": 4, "product": "D", "category": "Electronics", "quantity": 10, "price": 500 },
    { "_id": 5, "product": "A", "category": "Clothing", "quantity": 8, "price": 500},
    { "_id": 6, "product": "B", "category": "Clothing", "quantity": 12, "price": 200 },
    { "_id": 7, "product": "C", "category": "Clothing", "quantity": 8, "price": 600 },
    { "_id": 8, "product": "D", "category": "Clothing", "quantity": 12, "price": 700 }
])

根据 category 字段对文档进行分组并计算每个分组内文档的数量:

db.sales.aggregate([
  {
    $group : {
       _id : "$category",
       count: { $sum: 1 }
    }
  }
])

执行 $group 聚合阶段后输出的文档:

// 1
{
    "_id": "Clothing",
    "count": 4
}

// 2
{
    "_id": "Electronics",
    "count": 4
}

SpringBoot整合MongoDB实现:

// 输入文档
@Data
@Document(collection = "sales")
public class Sales {
    @MongoId
    private int _id;
    private String product;
    private String category;
    private int quantity;
    private int price;
}

// 输出文档
@Data
public class AggregationResult {
    private int _id;
    private int count;
}

// 聚合操作
@Test
public void aggregateTest() {
    // $group 聚合阶段
    GroupOperation group = Aggregation.group("category").count().as("count");

    // 组合聚合阶段
    Aggregation aggregation = Aggregation.newAggregation(group);

    // 执行聚合查询
    AggregationResults<AggregationResult> results
        = mongoTemplate.aggregate(aggregation, Sales.class, AggregationResult.class);
    List<AggregationResult> mappedResults = results.getMappedResults();

    // 打印结果
    mappedResults.forEach(System.out::println);
    //AggregationResult(_id=Clothing, count=4)
    //AggregationResult(_id=Electronics, count=4)
}

2. $group-> $project

$group 单字段分组 + $project 排除字段 + $project 重命名字段

构造测试数据:

db.sales.drop()

db.sales.insertMany([
    { "_id": 1, "product": "A", "category": "Electronics", "quantity": 10, "price": 100 },
    { "_id": 2, "product": "B", "category": "Electronics", "quantity": 5, "price": 200 },
	{ "_id": 3, "product": "C", "category": "Electronics", "quantity": 5, "price": 300 },
	{ "_id": 4, "product": "D", "category": "Electronics", "quantity": 10, "price": 500 },
    { "_id": 5, "product": "A", "category": "Clothing", "quantity": 8, "price": 500},
    { "_id": 6, "product": "B", "category": "Clothing", "quantity": 12, "price": 200 },
    { "_id": 7, "product": "C", "category": "Clothing", "quantity": 8, "price": 600 },
    { "_id": 8, "product": "D", "category": "Clothing", "quantity": 12, "price": 700 }
])
2.1 $group

执行 $group 聚合阶段后输出的文档:

db.sales.aggregate([
  {
    $group : {
       _id : "$category",
       count: { $sum: 1 }
    }
  }
])
// 1
{
    "_id": "Clothing",
    "count": 4
}

// 2
{
    "_id": "Electronics",
    "count": 4
}
2.2 $group-> $project

执行 g r o u p + group+ group+project 聚合阶段后输出的文档:

db.sales.aggregate([
  // $group阶段:将聚合管道内的文档按照category分组,并计算分组内的文档数量
  {
    $group : {
       _id : "$category",
       count: { $sum: 1 }
    }
  },
  // $project阶段:将聚合管道内的文档排除_id字段,并将count字段的名称重命名newCount字段
  {
    $project : {
       _id : 0,
       newCount: "$count"
    }
  }
])
// 1
{
    "newCount": 4
}

// 2
{
    "newCount": 4
}
2.3 SpringBoot 整合 MongoDB
// 输入文档实体类
@Data
@Document(collection = "sales")
public class Sales {
    @Id
    private int _id;
    private String product;
    private String category;
    private int quantity;
    private int price;
}

// 输出文档实体类
@Data
public class AggregationResult {
    private String newCount;
}

// 聚合操作
@SpringBootTest
@RunWith(SpringRunner.class)
public class BeanLoadServiceTest {

    @Autowired
    private MongoTemplate mongoTemplate;

    @Test
    public void aggregateTest() {
        // $group 聚合阶段
        GroupOperation group = Aggregation.group("category").count().as("count");

        // $project 聚合阶段
        ProjectionOperation project = Aggregation.project().andExclude("_id").and("count").as("newCount");

        // 组合聚合阶段
        Aggregation aggregation = Aggregation.newAggregation(group,project);

        // 执行聚合查询
        AggregationResults<AggregationResult> results
                = mongoTemplate.aggregate(aggregation, Sales.class, AggregationResult.class);
        List<AggregationResult> mappedResults = results.getMappedResults();

        // 打印结果
        mappedResults.forEach(System.out::println);
        //AggregationResult(newCount=4)
		//AggregationResult(newCount=4)
    }
}

3. $match-> $group -> $match

$match 根据条件筛选文档+ $group 根据单字段分组文档 + $match 筛选分组后的文档

构造测试数据:

db.sales.drop()

db.sales.insertMany([
    { "_id": 1, "product": "A", "category": "Electronics", "quantity": 10, "price": 100 },
    { "_id": 2, "product": "B", "category": "Electronics", "quantity": 5, "price": 200 },
	{ "_id": 3, "product": "C", "category": "Electronics", "quantity": 5, "price": 300 },
	{ "_id": 4, "product": "D", "category": "Electronics", "quantity": 10, "price": 500 },
    { "_id": 5, "product": "A", "category": "Clothing", "quantity": 8, "price": 500},
    { "_id": 6, "product": "B", "category": "Clothing", "quantity": 12, "price": 200 },
    { "_id": 7, "product": "C", "category": "Clothing", "quantity": 8, "price": 600 },
    { "_id": 8, "product": "D", "category": "Clothing", "quantity": 12, "price": 700 }
])
3.1 $match

执行 $match 聚合阶段输出的文档为:

db.sales.aggregate([
  // 第一阶段:筛选出 price>=300 的文档
  {
    $match : {
       "price": { $gte: 300 }
    }
  }
])
// 1
{
    "_id": 3,
    "product": "C",
    "category": "Electronics",
    "quantity": 5,
    "price": 300
}

// 2
{
    "_id": 4,
    "product": "D",
    "category": "Electronics",
    "quantity": 10,
    "price": 500
}

// 3
{
    "_id": 5,
    "product": "A",
    "category": "Clothing",
    "quantity": 8,
    "price": 500
}

// 4
{
    "_id": 7,
    "product": "C",
    "category": "Clothing",
    "quantity": 8,
    "price": 600
}

// 5
{
    "_id": 8,
    "product": "D",
    "category": "Clothing",
    "quantity": 12,
    "price": 700
}
3.2 $match-> $group

执行 m a t c h + match+ match+group 聚合阶段是输出的文档为:

db.sales.aggregate([
  // 第一阶段:筛选出 price>=300 的文档
  {
    $match : {
       "price": { $gte: 300 }
    }
  },
  // 第二阶段:将聚合管道内的文档按照category分组,并计算分组内的文档数量
  {
    $group : {
       _id : "$category",
       count: { $sum: 1 }
    }
  }
])
// 1
{
    "_id": "Clothing",
    "count": 3
}

// 2
{
    "_id": "Electronics",
    "count": 2
}
3.3 $match-> $group-> $match

执行 m a t c h + match+ match+group+$match 聚合阶段是输出的文档为:

db.sales.aggregate([
  // 第一阶段:筛选出 price>=300 的文档
  {
    $match : {
       "price": { $gte: 300 }
    }
  },
  // 第二阶段:将聚合管道内的文档按照category分组,并计算分组内的文档数量
  {
    $group : {
       _id : "$category",
       count: { $sum: 1 }
    }
  },
  // 第三阶段:筛选出 count>=3 的文档
  {
    $match : {
       "count": { $gte: 3 }
    }
  }
])
// 1
{
    "_id": "Clothing",
    "count": 3
}
3.4 SpringBoot 整合 MongoDB
// 输入文档实体
@Data
@Document(collection = "sales")
public class Sales {
    @Id
    private int _id;
    private String product;
    private String category;
    private int quantity;
    private int price;
}

// 输出文档实体
@Data
public class AggregationResult {
    private String _id;
    private int count;
}

// 执行聚合阶段
@SpringBootTest
@RunWith(SpringRunner.class)
public class BeanLoadServiceTest {

    @Autowired
    private MongoTemplate mongoTemplate;

    @Test
    public void aggregateTest() {
        // $match 聚合阶段
        MatchOperation match1 = Aggregation.match(Criteria.where("price").gte(300));

        // $group 聚合阶段
        GroupOperation group = Aggregation.group("category").count().as("count");

        // $match 聚合阶段
        MatchOperation match2 = Aggregation.match(Criteria.where("count").gte(3));

        // 组合聚合阶段
        Aggregation aggregation = Aggregation.newAggregation(match1,group,match2);

        // 执行聚合查询
        AggregationResults<AggregationResult> results
                = mongoTemplate.aggregate(aggregation, Sales.class, AggregationResult.class);
        List<AggregationResult> mappedResults = results.getMappedResults();

        // 打印结果
        mappedResults.forEach(System.out::println);
        //AggregationResult(_id=Clothing, count=3)
    }
}

4. $match-> $group-> $project-> $sort-> skip-> $limit

$match 根据条件筛选文档+ $group 根据单字段分组文档 + $project 重命名字段+ $sort 对文档按照唯一键排序

构造测试数据:

db.sales.drop()

db.sales.insertMany([
    { "_id": 1, "product": "C", "category": "Electronics", "quantity": 10, "price": 100 },
    { "_id": 2, "product": "A", "category": "Electronics", "quantity": 5, "price": 200 },
	{ "_id": 3, "product": "A", "category": "Electronics", "quantity": 5, "price": 300 },
	{ "_id": 4, "product": "D", "category": "Electronics", "quantity": 10, "price": 500 },
    { "_id": 5, "product": "A", "category": "Clothing", "quantity": 8, "price": 500},
    { "_id": 6, "product": "B", "category": "Clothing", "quantity": 12, "price": 200 },
    { "_id": 7, "product": "B", "category": "Clothing", "quantity": 8, "price": 600 },
    { "_id": 8, "product": "C", "category": "Clothing", "quantity": 12, "price": 700 }
])
4.1 $match

执行 $match 聚合阶段输出的文档为:

db.sales.aggregate([
  // $match 阶段:筛选出 price>100 的文档
  {
    $match : {
       "price": { $gt: 100 }
    }
  }
])
// 1
{
    "_id": 2,
    "product": "A",
    "category": "Electronics",
    "quantity": 5,
    "price": 200
}

// 2
{
    "_id": 3,
    "product": "A",
    "category": "Electronics",
    "quantity": 5,
    "price": 300
}

// 3
{
    "_id": 4,
    "product": "D",
    "category": "Electronics",
    "quantity": 10,
    "price": 500
}

// 4
{
    "_id": 5,
    "product": "A",
    "category": "Clothing",
    "quantity": 8,
    "price": 500
}

// 5
{
    "_id": 6,
    "product": "B",
    "category": "Clothing",
    "quantity": 12,
    "price": 200
}

// 6
{
    "_id": 7,
    "product": "B",
    "category": "Clothing",
    "quantity": 8,
    "price": 600
}

// 7
{
    "_id": 8,
    "product": "C",
    "category": "Clothing",
    "quantity": 12,
    "price": 700
}
4.2 $match-> $group

执行 $match + $group 聚合阶段输出的文档为:

db.sales.aggregate([
  // $match阶段:筛选出 price>=300 的文档
  {
    $match : {
       "price": { $gt: 100 }
    }
  },
  // $group阶段:将聚合管道内的文档按照category分组,并计算分组内的文档数量
  {
    $group : {
       _id : "$product",
       count: { $sum: 1 }
    }
  }
])
// 1
{
    "_id": "C",
    "count": 1
}

// 2
{
    "_id": "D",
    "count": 1
}

// 3
{
    "_id": "B",
    "count": 2
}

// 4
{
    "_id": "A",
    "count": 3
}
4.3 $match-> $group-> $project

执行$match + $group + $project 聚合阶段输出的文档为:

db.sales.aggregate([
  // $match阶段:筛选出 price>=300 的文档
  {
    $match : {
       "price": { $gt: 100 }
    }
  },
  // $group阶段:将聚合管道的文档按照category分组,并计算分组内的文档数量
  {
    $group : {
       _id : "$product",
       count: { $sum: 1 }
    }
  },
  // $project阶段:输出文档排除_id字段,包含count字段,并将_id字段重命名为product字段
  {
    $project : {
       _id:0,
       count: 1,
       product: "$_id"
    }
  }
])
// 1
{
    "count": 1,
    "product": "C"
}

// 2
{
    "count": 1,
    "product": "D"
}

// 3
{
    "count": 2,
    "product": "B"
}

// 4
{
    "count": 3,
    "product": "A"
}
4.4 $match-> $group-> $project-> $sort

执行$match + $group + $project + $sort 聚合阶段输出的文档为:

db.sales.aggregate([
  // $match阶段:筛选出 price>=300 的文档
  {
    $match : {
       "price": { $gt: 100 }
    }
  }, 
  // $group阶段:将聚合管道的文档按照category分组,并计算分组内的文档数量
  {
    $group : {
       _id : "$product",
       count: { $sum: 1 }
    }
  },
  // $project阶段:将聚合管道内的文档排除_id字段,包含count字段,并将_id字段重命名为product字段
  {
    $project : {
       _id:0,
       count: 1,
       product: "$_id"
    }
  },
  // $sort阶段:将聚合管道内的文档按照count字段降序排序
  {
    $sort : {
       count:-1
    }
  }
])
// 1
{
    "count": 3,
    "product": "A"
}

// 2
{
    "count": 2,
    "product": "B"
}

// 3
{
    "count": 1,
    "product": "C"
}

// 4
{
    "count": 1,
    "product": "D"
}
4.5 $match-> $group-> $project-> $sort-> $skip
db.sales.aggregate([
  // $match阶段:筛选出 price>=300 的文档
  {
    $match : {
       "price": { $gt: 100 }
    }
  }, 
  // $group阶段:将聚合管道的文档按照category分组,并计算分组内的文档数量
  {
    $group : {
       _id : "$product",
       count: { $sum: 1 }
    }
  },
  // $project阶段:将聚合管道内的文档排除_id字段,包含count字段,并将_id字段重命名为product字段
  {
    $project : {
       _id:0,
       count: 1,
       product: "$_id"
    }
  },
  // $sort阶段:将聚合管道内的文档按照count字段降序排序
  {
    $sort : {
       count:-1
    }
  },
  // $skip阶段:跳过聚合管道的前2个文档并输出
  {
    $skip: 2
  }
])
// 1
{
    "count": 1,
    "product": "C"
}

// 2
{
    "count": 1,
    "product": "D"
}
4.5 $match-> $group-> $project-> $sort-> $skip-> $limit

执行 $match + $group + $project + $sort + $limit 聚合阶段输出的文档为:

db.sales.aggregate([
  // $match阶段:筛选出 price>=300 的文档
  {
    $match : {
       "price": { $gt: 100 }
    }
  }, 
  // $group阶段:将聚合管道的文档按照category分组,并计算分组内的文档数量
  {
    $group : {
       _id : "$product",
       count: { $sum: 1 }
    }
  },
  // $project阶段:将聚合管道内的文档排除_id字段,包含count字段,并将_id字段重命名为product字段
  {
    $project : {
       _id:0,
       count: 1,
       product: "$_id"
    }
  },
  // $sort阶段:将聚合管道内的文档按照count字段降序排序
  {
    $sort : {
       count:-1
    }
  },
  // $skip阶段:跳过聚合管道的前2个文档并输出
  {
    $skip: 2
  },
  // $limit阶段:仅输出聚合管道内的前1个文档
  {
    $limit: 1
  }
])
// 1
{
    "count": 1,
    "product": "C"
}
4.6 SpringBoot 整合 MongoDB
// 输入文档实体类
@Data
@Document(collection = "sales")
public class Sales {
    @Id
    private int _id;
    private String product;
    private String category;
    private int quantity;
    private int price;
}

// 输出文档实体类
@Data
public class AggregationResult {
    private int count;
    private String product;
}

// 执行聚合操作
@SpringBootTest
@RunWith(SpringRunner.class)
public class BeanLoadServiceTest {

    @Autowired
    private MongoTemplate mongoTemplate;

    @Test
    public void aggregateTest() {
        // $match 聚合阶段
        MatchOperation match = Aggregation.match(Criteria.where("price").gt(100));

        // $group 聚合阶段
        GroupOperation group = Aggregation.group("product").count().as("count");

        // $project 聚合阶段
        ProjectionOperation project = Aggregation.project("count").andExclude("_id").and("$_id").as("product");

        // $sort聚合阶段
        SortOperation sort = Aggregation.sort(Sort.Direction.DESC, "count");

        // $skip 聚合阶段
        SkipOperation skip = Aggregation.skip(2);

        // $limit 聚合阶段
        LimitOperation limit = Aggregation.limit(1);

        // 组合聚合阶段
        Aggregation aggregation = Aggregation.newAggregation(match,group,project,sort,skip,limit);

        // 执行聚合查询
        AggregationResults<AggregationResult> results
                = mongoTemplate.aggregate(aggregation, Sales.class, AggregationResult.class);
        List<AggregationResult> mappedResults = results.getMappedResults();

        // 打印结果
        mappedResults.forEach(System.out::println);
        //AggregationResult(count=1, product=C)
    }
}

5. $group-> $project

构造测试数据:

db.sales.drop()

db.sales.insertMany([
    { "_id": 1, "product": "C", "category": "Electronics", "quantity": 10, "price": 100 },
    { "_id": 2, "product": "A", "category": "Electronics", "quantity": 5, "price": 200 },
	{ "_id": 3, "product": "A", "category": "Electronics", "quantity": 5, "price": 300 },
	{ "_id": 4, "product": "D", "category": "Electronics", "quantity": 10, "price": 500 },
    { "_id": 5, "product": "A", "category": "Clothing", "quantity": 8, "price": 500},
    { "_id": 6, "product": "B", "category": "Clothing", "quantity": 12, "price": 200 },
    { "_id": 7, "product": "B", "category": "Clothing", "quantity": 8, "price": 600 },
    { "_id": 8, "product": "C", "category": "Clothing", "quantity": 12, "price": 700 }
])
5.1 $group 多字段分组聚合

$group 根据 category 和 product 字段分组后输出的文档为:

db.sales.aggregate([
  {
    // $group聚合阶段:将输入文档按照category和product字段分组
    $group: {
      _id: {
        category: "$category",
        product: "$product"
      },
      count: { $sum: 1 }
    }
  }
])
// 1
{
    "_id": {
        "category": "Clothing",
        "product": "C"
    },
    "count": 1
}

// 2
{
    "_id": {
        "category": "Clothing",
        "product": "B"
    },
    "count": 2
}

// 3
{
    "_id": {
        "category": "Clothing",
        "product": "A"
    },
    "count": 1
}

// 4
{
    "_id": {
        "category": "Electronics",
        "product": "A"
    },
    "count": 2
}

// 5
{
    "_id": {
        "category": "Electronics",
        "product": "D"
    },
    "count": 1
}

// 6
{
    "_id": {
        "category": "Electronics",
        "product": "C"
    },
    "count": 1
}
5.2 $group-> $project

执行 $group + $project 聚合阶段后输出的文档为:

db.sales.aggregate([
  // $group聚合阶段:将输入文档按照category和product字段分组
  {
    $group: {
      _id: {
        category: "$category",
        product: "$product"
      },
      count: { $sum: 1 }
    }
  },
  // $project聚合阶段:发将_id.category重命名为category,将_id.product重命名为product,包含count字段,排除_id字段
  {
    $project: {
    	category: "$_id.category",
    	product: "$_id.product",
    	count: 1,
    	_id: 0
    }
  }
])
// 1
{
    "count": 1,
    "category": "Clothing",
    "product": "C"
}

// 2
{
    "count": 2,
    "category": "Clothing",
    "product": "B"
}

// 3
{
    "count": 1,
    "category": "Clothing",
    "product": "A"
}

// 4
{
    "count": 2,
    "category": "Electronics",
    "product": "A"
}

// 5
{
    "count": 1,
    "category": "Electronics",
    "product": "D"
}

// 6
{
    "count": 1,
    "category": "Electronics",
    "product": "C"
}
5.3 $group-> $project-> $sort

执行 $group + $project + $sort 聚合阶段后输出的文档为:

db.sales.aggregate([
  // $group聚合阶段:将输入文档按照category和product字段分组
  {
    $group: {
      _id: {
        category: "$category",
        product: "$product"
      },
      count: { $sum: 1 }
    }
  },
  // $project聚合阶段:发将_id.category重命名为category,将_id.product重命名为product,包含count字段,排除_id字段
  {
    $project: {
    	category: "$_id.category",
    	product: "$_id.product",
    	count: 1,
    	_id: 0
    }
  },
  // $sort聚合阶段:将聚合管道内的文档按照count字段升序排序
  {
    $sort: {
    	count:1
    }
  }
])
// 1
{
    "count": 1,
    "category": "Clothing",
    "product": "C"
}

// 2
{
    "count": 1,
    "category": "Clothing",
    "product": "A"
}

// 3
{
    "count": 1,
    "category": "Electronics",
    "product": "D"
}

// 4
{
    "count": 1,
    "category": "Electronics",
    "product": "C"
}

// 5
{
    "count": 2,
    "category": "Clothing",
    "product": "B"
}

// 6
{
    "count": 2,
    "category": "Electronics",
    "product": "A"
}
5.4 $group-> $project-> $sort-> $limit

执行 $group + $project + $sort + $limit 聚合阶段后输出的文档为:

db.sales.aggregate([
  // $group聚合阶段:将输入文档按照category和product字段分组
  {
    $group: {
      _id: {
        category: "$category",
        product: "$product"
      },
      count: { $sum: 1 }
    }
  },
  // $project聚合阶段:发将_id.category重命名为category,将_id.product重命名为product,包含count字段,排除_id字段
  {
    $project: {
    	category: "$_id.category",
    	product: "$_id.product",
    	count: 1,
    	_id: 0
    }
  },
  // $sort聚合阶段:将聚合管道内的文档按照count字段升序排序
  {
    $sort: {
    	count:1
    }
  },
  // $limit聚合阶段:仅输出聚合管道内的前2个文档
  {
    $limit:2
  }
])
// 1
{
    "count": 1,
    "category": "Clothing",
    "product": "A"
}

// 2
{
    "count": 1,
    "category": "Clothing",
    "product": "C"
}
5.5 SpringBoot 整合 MongoDB
// 输入文档实体类
@Data
@Document(collection = "sales")
public class Sales {
    @Id
    private int _id;
    private String product;
    private String category;
    private int quantity;
    private int price;
}

// 输出文档实体类
@Data
public class AggregationResult {
    private int count;
    private String product;
    private String category;
}

// 执行聚合操作
@SpringBootTest
@RunWith(SpringRunner.class)
public class BeanLoadServiceTest {

    @Autowired
    private MongoTemplate mongoTemplate;

    @Test
    public void aggregateTest() {
        // $group 聚合阶段
        GroupOperation group = Aggregation.group("category","product").count().as("count");

        // $project 聚合阶段
        ProjectionOperation project = Aggregation.project("count").andExclude("_id")
                .and("$_id.category").as("category")
                .and("$_id.product").as("product");

        // $sort聚合阶段
        SortOperation sort = Aggregation.sort(Sort.Direction.DESC, "count");

        // $limit 聚合阶段
        LimitOperation limit = Aggregation.limit(2);

        // 组合聚合阶段
        Aggregation aggregation = Aggregation.newAggregation(group,project,sort,limit);

        // 执行聚合查询
        AggregationResults<AggregationResult> results
                = mongoTemplate.aggregate(aggregation, Sales.class, AggregationResult.class);
        List<AggregationResult> mappedResults = results.getMappedResults();

        // 打印结果
        mappedResults.forEach(System.out::println);
        //AggregationResult(count=2, product=A, category=Electronics)
        //AggregationResult(count=2, product=B, category=Clothing)
    }
}
;