• 周二. 8月 16th, 2022

5G编程聚合网

5G时代下一个聚合的编程学习网

热门标签

mongodb聚合操作$使用例子

admin

11月 28, 2021

$操作符使用

$操作符一般用作

== index a ==

1. $addField 增加一个字段

使用场景一:查询的结果中增加字段

数据类型如下

{
   "_id" : ObjectId("610cd5a0702aac3e62a77a13"),
   "name" : "zhang",
   "intro" : "a"
}

/* 2 */
{
   "_id" : ObjectId("610cd5a4702aac3e62a77a14"),
   "name" : "zhang",
   "intro" : "b"
}

/* 3 */
{
   "_id" : ObjectId("610cd5a6702aac3e62a77a15"),
   "name" : "zhang",
   "intro" : "b"
}

demo1:返回的结果中增加一个字段,保存执行的值

match_dict = {"$match":{"name": "zhang"}}
add_dict = {"$addFields": {"intro_list":"$intro"}}
ret = db["test"].aggregate([match_dict,add_dict])
print(list(ret))
# 结果
[{'_id': ObjectId('610cd5a0702aac3e62a77a13'), 'name': 'zhang', 'intro': 'a', 'intro_list': 'a'}, {'_id': ObjectId('610cd5a4702aac3e62a77a14'), 'name': 'zhang', 'intro': 'b', 'intro_list': 'b'}, {'_id': ObjectId('610cd5a6702aac3e62a77a15'), 'name': 'zhang', 'intro': 'b', 'intro_list': 'b'}]

demo2:将一个值类型转换后,保存到增加的字段

match_dict = {"$match": {"name": "zhang"}}
# 将查询结果的值进行转换,保存到一个指定的字段中,多用于lookup时候。
add_dict = {"$addFields": {"str_id": {"$toString":"$_id"}}}
ret = db["test"].aggregate([match_dict, add_dict])
# 结果
[{'_id': ObjectId('610cd5a0702aac3e62a77a13'), 'name': 'zhang', 'intro': 'a', 'str_id': '610cd5a0702aac3e62a77a13'}, {'_id': ObjectId('610cd5a4702aac3e62a77a14'), 'name': 'zhang', 'intro': 'b', 'str_id': '610cd5a4702aac3e62a77a14'}, {'_id': ObjectId('610cd5a6702aac3e62a77a15'), 'name': 'zhang', 'intro': 'b', 'str_id': '610cd5a6702aac3e62a77a15'}]

ObjectId转换为str类型

# 以前都是查出结果后,循环str将id从Object转换为str,现在可以一步到位
add_dict = {"$addFields": {"_id": {"$toString":"$_id"}}}
# 结果为
[{'_id': '610cd5a0702aac3e62a77a13', 'name': 'zhang', 'intro': 'a'}, {'_id': '610cd5a4702aac3e62a77a14', 'name': 'zhang', 'intro': 'b'}, {'_id': '610cd5a6702aac3e62a77a15', 'name': 'zhang', 'intro': 'b'}]

数据类型如下

/* 1 */
{
    "_id" : ObjectId("610cd5a0702aac3e62a77a13"),
    "name" : "china",
    "city_gdp" : [ 
        {
            "city" : "beijing",
            "gdp" : 100
        }, 
        {
            "city" : "shanghai",
            "gdp" : 200
        }, 
        {
            "city" : "tianjin",
            "gdp" : 300
        }, 
        {
            "city" : "chongqing",
            "gdp" : 400
        }
    ]
}

demo3:求gdp的总和和平均值

match_dict = {"$match": {"name": "china"}}
# 增加两个字段:保存总和和平均值
add_dict = {"$addFields": {"total": {"$sum":"$city_gdp.gdp"},"avg":{"$avg":"$city_gdp.gdp"}}}
ret = db["test"].aggregate([match_dict, add_dict])
print(list(ret))
# 结果
[{'_id': ObjectId('610cd5a0702aac3e62a77a13'), 'name': 'china', 'city_gdp': [{'city': 'beijing', 'gdp': 100}, {'city': 'shanghai', 'gdp': 200}, {'city': 'tianjin', 'gdp': 300}, {'city': 'chongqing', 'gdp': 400}], 'total': 1000, 'avg': 250.0}]

使用场景二:给原始数据增加字段—类似于修改表结构

数据结构如下

/* 2 */
{
    "_id" : ObjectId("610e97f48e28e168e81a2e62"),
    "company" : "apple",
    "computer" : 15000,
    "iphone" : 6000
}

/* 3 */
{
    "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
    "company" : "huawei",
    "computer" : 10000,
    "iphone" : 8000
}

每条数据增加一个字段,名字为pad,默认初始值为3000

update_dict = {"$addFields": {"pad": 3000}}
# 注意:必须使用列表包裹更新条件,否则报错
ret = db["test"].update_many({}, [update_dict])
# 结果
/* 2 */
{
    "_id" : ObjectId("610e97f48e28e168e81a2e62"),
    "company" : "apple",
    "computer" : 15000,
    "iphone" : 6000,
    "pad" : 3000
}

/* 3 */
{
    "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
    "company" : "huawei",
    "computer" : 10000,
    "iphone" : 8000,
    "pad" : 3000
}

2.$addToSet

使用方式一:作为列表修改器,一般和update方法搭配使用。
数据类型如下

{
    "_id" : ObjectId("610e91708e28e168e81a1c2f"),
    "name" : "xian",
    "foods" : [ 
        "肉夹馍"
    ]
}

给foods列表中添加数据,达到去重效果

ret = db["test"].update_one(filter={"name": "xian"}, update={"$addToSet": {"foods": "糖醋面皮"}})
# 结果---插入相同的值会直接去重,但是上面的写法缺点是每次只能添加一个元素
{
    "_id" : ObjectId("610e91708e28e168e81a1c2f"),
    "name" : "xian",
    "foods" : [ 
        "肉夹馍", 
        "糖醋面皮"
    ]
}

# 添加一组元素 --- 配合$each使用
ret = db["test"].update_one(filter={"name": "xian"}, update={"$addToSet": {"foods": {"$each":["麻辣烫夹馍","东关烧烤"]}}})
# 结果
{
    "_id" : ObjectId("610e91708e28e168e81a1c2f"),
    "name" : "xian",
    "foods" : [ 
        "肉夹馍", 
        "糖醋面皮", 
        "麻辣烫夹馍", 
        "东关烧烤"
    ]
}

使用方式二:去重的话,一般和group搭配使用

数据类型如下


{
    "_id" : ObjectId("610e8d0d8e28e168e81a1009"),
    "name" : "zhang",
    "intro" : "a"
}

/* 2 */
{
    "_id" : ObjectId("610e8d148e28e168e81a1023"),
    "name" : "zhang",
    "intro" : "a"
}

/* 3 */
{
    "_id" : ObjectId("610e8d1b8e28e168e81a103b"),
    "name" : "zhang",
    "intro" : "b"
}

查询intro值并去重

# 需要说明的是,$addToSet去重一般会和分组group一起使用
match_dict = {"$match": {"name": "zhang"}}
group_dict = {"$group": {"_id": None, "intro_list": {"$addToSet": "$intro"}}}
# 结果
[{'_id': None, 'intro_list': ['b', 'a']}]

3. $and

作用:逻辑与

4. $avg

作用:求平均

5. $add

作用:数学运算

数据类型如下

/* 2 */
{
    "_id" : ObjectId("610e97f48e28e168e81a2e62"),
    "company" : "apple",
    "computer" : 15000,
    "iphone" : 6000
}

/* 3 */
{
    "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
    "company" : "huawei",
    "computer" : 10000,
    "iphone" : 8000
}

match_dict = {"$match": {"$or": [{"company": "apple"}, {"company": "huawei"}]}}
# project也可以增加字段,$add将不同字段的值进行相加
project_dict = {"$project": {"company": 1, "computer": 1, "iphone": 1, "total": {"$add": ["$computer", "$iphone"]}}}
ret = db["test"].aggregate([match_dict, project_dict])
print(list(ret))

# 结果
[{'_id': ObjectId('610e97f48e28e168e81a2e62'), 'company': 'apple', 'computer': 15000, 'iphone': 6000, 'total': 21000}, {'_id': ObjectId('610e97fe8e28e168e81a2e87'), 'company': 'huawei', 'computer': 10000, 'iphone': 8000, 'total': 18000}]


6. $all

作用:用于匹配数组元素

7. $abs

作用:求绝对值

数据

{
    "_id" : ObjectId("610ea57f8e28e168e81a5370"),
    "name" : "haerbin",
    "temp" : -10
}

返回绝对值后的数据

match_dict = {"$match": {"name": "haerbin"}}
# 利用project可以增加字段的性质,替换_id的值
project = {"$project": {"name": 1, "temp": 1, "abs_temp": {"$abs": "$temp"}, "_id": {"$toString": "$_id"}}}
ret = db["test"].aggregate([match_dict, project])
# 结果
[{'name': 'haerbin', 'temp': -10, 'abs_temp': 10, '_id': '610ea57f8e28e168e81a5370'}]

同样适用于find

# 完美解决find查询后处理_id类型转换的麻烦
ret = db["test"].find_one({"name": "haerbin"},{"name": 1, "temp": 1, "abs_temp": {"$abs": "$temp"}, "_id": {"$toString": "$_id"}})
# 结果:
{'name': 'haerbin', 'temp': -10, 'abs_temp': 10, '_id': '610ea57f8e28e168e81a5370'}

8. $arrayElemAt

作用:返回数组内指定下标的元素

数据类型如下:

{
    "_id" : ObjectId("610e91708e28e168e81a1c2f"),
    "name" : "xian",
    "foods" : [ 
        "肉夹馍", 
        "糖醋面皮", 
        "麻辣烫夹馍", 
        "东关烧烤"
    ],
    "pad" : 3000
}

返回foods下标为0的元素

match_dict = {"$match": {"name": "xian"}}
# 取foods列表中的第一个元素
project = {"$project": {"foods": 1, "pad": 1, "_id": {"$toString": "$_id"}, "foods_num1": {"$arrayElemAt":["$foods", 0]}}}
ret = db["test"].aggregate([match_dict, project])
# 结果
[{'foods': ['肉夹馍', '糖醋面皮', '麻辣烫夹馍', '东关烧烤'], 'pad': 3000, '_id': '610e91708e28e168e81a1c2f', 'foods_num1': '肉夹馍'}]

9. $arrayToObject

作用:将数组内的key-value元素转换为对象

使用场景一:将数据的字段A和字段B的值组成字典格式返回

数据类型如下

{
    "_id" : ObjectId("610e97f48e28e168e81a2e62"),
    "company" : "apple",
    "computer" : 15000,
    "iphone" : 6000,
    "pad" : 3000
}

/* 3 */
{
    "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
    "company" : "huawei",
    "computer" : 10000,
    "iphone" : 8000,
    "pad" : 3000
}

输出,{“hawei”:8000},{“apple”:”6000″}

match_dict = {"$match": {"$or": [{"company": "apple"}, {"company": "huawei"}]}}
# push不能和project搭配使用。
group_dict = {"$group": {"_id": None, "list": {"$push": {"k": "$company", "v": "$pad"}}}}
# project = {"$project": {"computer": 1, "iphone": 1, "_id": {"$toString": "$_id"}, "pad": 1,"list": {"$push": {"k": "$company", "v": "$pad"}}}}
ret = db["test"].aggregate([match_dict, group_dict])
# 结果 
[{'_id': None, 'list': [{'k': 'apple', 'v': 3000}, {'k': 'huawei', 'v': 3000}]}]

match_dict = {"$match": {"$or": [{"company": "apple"}, {"company": "huawei"}]}}
group_dict = {"$group": {"_id": None, "list": {"$push": {"k": "$company", "v": "$iphone"}}}}
project = {"$project": {"return_dict": {"$arrayToObject": "$list"}}}
ret = db["test"].aggregate([match_dict, group_dict, project])
# 结果
[{'_id': None, 'return_dict': {'apple': 6000, 'huawei': 8000}}]

arrayToObject能够识别的类型如下

类型一:
[ [ “item”, “abc123”], [ “qty”, 25 ] ]
类型二:
[ { “k”: “item”, “v”: “abc123”}, { “k”: “qty”, “v”: 25 } ]

https://blog.csdn.net/qq_38923792/article/details/110390327

10. $accumulator

作用: 自定义累加器
TODO

11. $allElementsTrue

作用:判断集合中是否包含指定元素,如果数组中有null,0,undefine则返回false,空数组及其他值返回True。

数据类型如下

 5 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe472"),
    "s_id" : 1,
    "responses" : [ 
        true
    ]
}

/* 6 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe473"),
    "s_id" : 2,
    "responses" : [ 
        true, 
        false
    ]
}

/* 7 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe474"),
    "s_id" : 3,
    "responses" : []
}

/* 8 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe475"),
    "s_id" : 4,
    "responses" : [ 
        1, 
        true, 
        "seven"
    ]
}

/* 9 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe476"),
    "s_id" : 5,
    "responses" : [ 
        0
    ]
}

/* 10 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe477"),
    "s_id" : 6,
    "responses" : [ 
        []
    ]
}

/* 11 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe478"),
    "s_id" : 7,
    "responses" : [ 
        [ 
            0
        ]
    ]
}

/* 12 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe479"),
    "s_id" : 8,
    "responses" : [ 
        [ 
            true
        ]
    ]
}

/* 13 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe47a"),
    "s_id" : 9,
    "responses" : [ 
        null
    ]
}

/* 14 */
{
    "_id" : ObjectId("610fbcacaa6673116fbbe47b"),
    "s_id" : 10,
    "responses" : [ 
        false
    ]

判断response是否包含None,0,undefine的元素
当前版本:MONGO_VERSION=4.4.6
需要版本:MONGO_VERSION=5.0+

temp_dict = {"$project": {"responses": 1, "isAllTrue": {"$allElementsTrue": ["$responses"] }, "_id": 0 } }
ret = db["test"].aggregate([temp_dict])
报错:pymongo.errors.OperationFailure: $allElementsTrue's argument must be an array, but is missing, full error: {'operationTime': Timestamp(1628423017, 1), 'ok': 0.0, 'errmsg': "$allElementsTrue's argument must be an array, but is missing", 'code': 17040, 'codeName': 'Location17040', '$clusterTime': {'clusterTime': Timestamp(1628423017, 1), 'signature': {'hash': b'x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00x00', 'keyId': 0}}}

12. $acos

作用:返回一个值的反余弦值

13. $avg

作用:求平均值

使用场景一:和project搭配使用

数据样式

/* 2 */
{
    "_id" : ObjectId("610e97f48e28e168e81a2e62"),
    "company" : "apple",
    "computer" : 15000,
    "iphone" : 6000,
    "pad" : 3000
}

/* 3 */
{
    "_id" : ObjectId("610e97fe8e28e168e81a2e87"),
    "company" : "huawei",
    "computer" : 10000,
    "iphone" : 8000,
    "pad" : 3000
}

求computer + iphone + pad 加起来的平均值

match_dict = {"$match": {"$or": [{"company": "huawei"}, {"company": "apple"}]}}
# 对多个数求平均值
project = {"$project": {"_id": {"$toString": "$_id"}, "avg_three": {"$avg": ["$computer", "$iphone", "$pad"]}}}
ret = db["test"].aggregate([match_dict, project])
print(list(ret))"avg_three": {"$avg": ["$computer", "$iphone", "$ipad"]}}}}
ret = db["test"].aggregate([match_dict, project])
# 结果
[{'_id': '610e97f48e28e168e81a2e62', 'avg_three': 8000.0}, {'_id': '610e97fe8e28e168e81a2e87', 'avg_three': 7000.0}]

使用场景二:和group搭配使用

数据格式如下

{ "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-01-01T08:00:00Z") }
{ "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1, "date" : ISODate("2014-02-03T09:00:00Z") }
{ "_id" : 3, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-03T09:05:00Z") }
{ "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }
{ "_id" : 5, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T09:12:00Z") }

官方例子代码

db.sales.aggregate(
   [
     {
       $group:
         {
           _id: "$item",
           avgAmount: { $avg: { $multiply: [ "$price", "$quantity" ] } },
           avgQuantity: { $avg: "$quantity" }
         }
     }
   ]
)

#  结果
{ "_id" : "xyz", "avgAmount" : 37.5, "avgQuantity" : 7.5 }
{ "_id" : "jkl", "avgAmount" : 20, "avgQuantity" : 1 }
{ "_id" : "abc", "avgAmount" : 60, "avgQuantity" : 6 }

== index b ==

1. $bucket

作用:分段统计数据

使用方式一:单独使用

数据格式如下

// Newly added document
{ 
    "name" : "t1", 
    "price" : 100.0, 
    "_id" : ObjectId("6110e53447e9154a103dc257")
}
// Newly added document
{ 
    "name" : "t2", 
    "price" : 200.0, 
    "_id" : ObjectId("6110e53847e9154a103dc258")
}
// Newly added document
{ 
    "name" : "t3", 
    "price" : 300.0, 
    "_id" : ObjectId("6110e53d47e9154a103dc259")
}
// Newly added document
{ 
    "name" : "t4", 
    "price" : 400.0, 
    "_id" : ObjectId("6110e54247e9154a103dc25a")
}
// Newly added document
{ 
    "name" : "t5", 
    "price" : 500.0, 
    "_id" : ObjectId("6110e54747e9154a103dc25b")
}

分组 — 一般group是按照字段进行分组,$bucket可以按照字段值进行判断分组

match = {"$match":{"_id":{"$ne":""}}}
bucket =  {
    "$bucket": {
      # 分组
      "groupBy": "$price", 
      # 边界,200<=x<400 有几个边界条件,就会分几组
      "boundaries": [ 200, 400 ], 
      #符合边界条件的为一组,组名为$price的值, 不符合边界条件的为一组,组名为Other
      "default": "Other", 
      # 输出格式
      "output": {
        "count": { "$sum": 1 }, 
        "titles" : { "$push": "$name"} 
      }
    
  }
  }
# 结果
[{'_id': 200, 'count': 2, 'titles': ['t2', 't3']}, 
{'_id': 'Other', 'count': 3, 'titles': ['t1', 't4', 't5']}]



bucket =  {
    "$bucket": {
      # 分组
      "groupBy": "$price", 
      # 边界,t1:200<=x<300, t2:300<=x<500 
      "boundaries": [ 100, 300, 500 ], 
      # 符合边界条件的为一组,组名为$price的值, 不符合边界条件的为一组,组名为Other
      "default": "Other", 
      # 输出格式
      "output": {
        "count": { "$sum": 1 }, 
        "titles" : { "$push": "$name"} 
      }
    
  }
  }
# 结果
[{'_id': 100, 'count': 2, 'titles': ['t1', 't2']}, {'_id': 300, 'count': 2, 'titles': ['t3', 't4']}, {'_id': 'Other', 'count': 1, 'titles': ['t5']}]

使用方式二:搭配facet使用

https://docs.mongodb.com/v4.4/reference/operator/aggregation/bucket/#mongodb-pipeline-pipe.-bucket

2. $bucketauto

作用:根据指定的表达式将传入的文档分类到特定数量的组(称为bucket)中。Bucket边界将自动确定,以便将文档平均分配到指定数量的Bucket中。

https://docs.mongodb.com/v4.4/reference/operator/aggregation/bucketAuto/#mongodb-pipeline-pipe.-bucketAuto

3. $binarySize

作用:返回指定字符串或二进制数据的字节大小

例如:可以存储照片的大小

== index c ==

1. $ceil

作用:

数据格式如下

{ 
    "_id" : ObjectId("6110e53447e9154a103dc257"), 
    "name" : "t1", 
    "price" : -2.1
}
{ 
    "_id" : ObjectId("6110e53847e9154a103dc258"), 
    "name" : "t2", 
    "price" : 0.0
}
{ 
    "_id" : ObjectId("6110e53d47e9154a103dc259"), 
    "name" : "t3", 
    "price" : 3.2
}
{ 
    "_id" : ObjectId("6110e54247e9154a103dc25a"), 
    "name" : "t4", 
    "price" : -2.6
}
{ 
    "_id" : ObjectId("6110e54747e9154a103dc25b"), 
    "name" : "t5", 
    "price" : 1.0
}
// Newly added document
{ 
    "name" : "t6", 
    "price" : 3.6, 
    "_id" : ObjectId("6114be69605d7c02aa34eac8")
}

代码

match = {"$match":{"_id":{"$ne":""}}}
project =  {
    "$project": { "price":1,"name":1,"new_price":{"$ceil":"$price"}}
  }

ret= db["test"]["test"].aggregate([match,project])
# 结果:0还是0,负数返回比自己大的最近负整数,正数返回比自己大的最近正整数
[{'_id': ObjectId('6110e53447e9154a103dc257'), 'name': 't1', 'price': -2.1, 'new_price': -2.0}, 
{'_id': ObjectId('6110e53847e9154a103dc258'), 'name': 't2', 'price': 0.0, 'new_price': 0.0},
{'_id': ObjectId('6110e53d47e9154a103dc259'), 'name': 't3', 'price': 3.2, 'new_price': 4.0}, 
{'_id': ObjectId('6110e54247e9154a103dc25a'), 'name': 't4', 'price': -2.6, 'new_price': -2.0}, 
{'_id': ObjectId('6110e54747e9154a103dc25b'), 'name': 't5', 'price': 1.0, 'new_price': 1.0}, 
{'_id': ObjectId('6114be69605d7c02aa34eac8'), 'name': 't6', 'price': 3.6, 'new_price': 4.0}]
print(list(ret))

2. $cmp

作用:比较两个数的大小

数据格式如下:

{ 
    "_id" : ObjectId("6110e53447e9154a103dc257"), 
    "name" : "t1", 
    "price" : -2.1
}
{ 
    "_id" : ObjectId("6110e53847e9154a103dc258"), 
    "name" : "t2", 
    "price" : 0.0
}
{ 
    "_id" : ObjectId("6110e53d47e9154a103dc259"), 
    "name" : "t3", 
    "price" : 3.2
}
{ 
    "_id" : ObjectId("6110e54247e9154a103dc25a"), 
    "name" : "t4", 
    "price" : -2.6
}
{ 
    "_id" : ObjectId("6110e54747e9154a103dc25b"), 
    "name" : "t5", 
    "price" : 1.0
}
// Newly added document
{ 
    "name" : "t6", 
    "price" : 3.6, 
    "_id" : ObjectId("6114be69605d7c02aa34eac8")
}

需求:返回price比0大和比0小的数据

代码

match = {"$match":{"_id":{"$ne":""}}}
project =  {
    # 和0进行比较
    "$project": { "price":1,"name":1,"cmp_to_zero":{"$cmp":["$price",0]}}
  }

ret= db["test"]["test"].aggregate([match,project])
# 结果---cmp_to_zero 为0表示和0相等,-1表示小于0, 1表示大于0
[{'_id': ObjectId('6110e53447e9154a103dc257'), 'name': 't1', 'price': -2.1, 'cmp_to_zero': -1}, 
{'_id': ObjectId('6110e53847e9154a103dc258'), 'name': 't2', 'price': 0.0, 'cmp_to_zero': 0}, 
{'_id': ObjectId('6110e53d47e9154a103dc259'), 'name': 't3', 'price': 3.2, 'cmp_to_zero': 1}, 
{'_id': ObjectId('6110e54247e9154a103dc25a'), 'name': 't4', 'price': -2.6, 'cmp_to_zero': -1}, 
{'_id': ObjectId('6110e54747e9154a103dc25b'), 'name': 't5', 'price': 1.0, 'cmp_to_zero': 1}, 
{'_id': ObjectId('6114be69605d7c02aa34eac8'), 'name': 't6', 'price': 3.6, 'cmp_to_zero': 1}]

3. $concat

作用:字符串拼接

数据格式如下

{ 
    "_id" : ObjectId("6110e53447e9154a103dc257"), 
    "city" : "shantou", 
    "contry" : "china", 
    "province" : "guangdong", 
    "street" : "test_street"
}

拼接完整的地址

代码

match = {"$match":{"_id":{"$ne":""}}}
project =  {
    "$project": { "province":1,"contry":1,"city":1,"street":1,"detail":{"$concat":["$contry","/","$province","/","$city","/","$street"]}}
  }

ret= db["test"]["test"].aggregate([match,project])
# 结果
[
{'_id': ObjectId('6110e53447e9154a103dc257'), 'city': 'shantou', 'contry': 'china', 'province': 'guangdong', 'street': 'test_street', 'detail': 'china/guangdong/shantou/test_street'}
]

4.$concatArrays

作用:将多个列表拼接成一个列表

数据格式如下

{ 
    "_id" : ObjectId("6110e53447e9154a103dc257"), 
    "east_city" : [
        "shanghai", 
        "xiamen"
    ], 
    "north_city" : [
        "beijing", 
        "tianjin"
    ], 
    "south_city" : [
        "nanjing", 
        "shenzhen"
    ], 
    "west_city" : [
        "shengdu", 
        "wulumuqi"
    ], 
    "middle_city" : [

    ]
}

将东西南北城市列表合成一个列表返回

代码

match = {"$match":{"_id":{"$ne":""}}}
project =  {
    "$project": { "city_list":{"$concatArrays":["$east_city","$west_city","$north_city","$south_city","$middle_city"]}}
  }

ret= db["test"]["test"].aggregate([match,project])
# 结果
[{'_id': ObjectId('6110e53447e9154a103dc257'), 'city_list': ['shanghai', 'xiamen', 'shengdu', 'wulumuqi', 'beijing', 'tianjin', 'nanjing', 'shenzhen']}]
print(list(ret))

5.$cond

作用:返回if或者else里的返回值—条件匹配

数据格式如下:

// Newly added document
{ 
    "name" : "t1", 
    "price" : 0.0, 
    "_id" : ObjectId("6114d086605d7c02aa34ead9")
}
// Newly added document
{ 
    "name" : "t2", 
    "price" : 10.0, 
    "_id" : ObjectId("6114d08f605d7c02aa34eada")
}
// Newly added document
{ 
    "name" : "t3", 
    "price" : 30.0, 
    "_id" : ObjectId("6114d096605d7c02aa34eadb")
}
// Newly added document
{ 
    "name" : "t4", 
    "price" : 50.0, 
    "_id" : ObjectId("6114d09f605d7c02aa34eadc")
}
// Newly added document
{ 
    "name" : "t5", 
    "price" : 80.0, 
    "_id" : ObjectId("6114d0a4605d7c02aa34eadd")
}
// Newly added document
{ 
    "name" : "t6", 
    "price" : 100.0, 
    "_id" : ObjectId("6114d0aa605d7c02aa34eade")
}

price 小于100返回1000,大于50返回2000

代码

match = {"$match":{"_id":{"$ne":""}}}
project =  {
    # 满足 price和100的比较,
    "$project": {"_id":0,"price":1, "price_result":{"$cond":{"if":{"$gte":["$price",100]},"then":2000,"else":1000}}}
  }
ret= db["test"]["test"].aggregate([match,project])
# 
[{'price': 0.0, 'price_result': 1000}, 
{'price': 10.0, 'price_result': 1000}, 
{'price': 30.0, 'price_result': 1000}, 
{'price': 50.0, 'price_result': 1000}, 
{'price': 80.0, 'price_result': 1000}, 
{'price': 100.0, 'price_result': 2000}]
print(list(ret))

6. $convert

作用:字段类型转换

准备数据时候,发现mongo不能直接插入decimal类型的数据,只能插入decimal128类型,插入数据代码

from decimal import Decimal
from bson.decimal128 import Decimal128
data = {
    "name":"zhang",
    "age":10,
    "weight":75.5,
    "birthday": datetime.datetime.now(),
    "income": Decimal128(Decimal("8500.23")),
    "has_car":False
}
ret = db["test"]["test"].insert_one(data)
print(ret.inserted_id)

数据格式如下:

{ 
    "_id" : ObjectId("6114e20b661341258c35c08c"), 
    "name" : "zhang", 
    "age" : NumberInt(10), 
    "weight" : 75.5, 
    "birthday" : ISODate("2021-08-12T16:55:39.754+0000"), 
    "income" : NumberDecimal("8500.23"), 
    "has_car" : false
}

语法如下

{
   $convert:
      {
         input: <expression>, 输入的值
         to: <type expression>, 输出的值
         onError: <expression>,  // 出错操作 
         onNull: <expression>    // 空值操作
      }
}

代码

match = {"$match":{"_id":{"$ne":""}}}

project =  {
    "$project": {
        "name":{"$convert":{"input":"$name","to":"bool","onError":"error","onError":None}},
        "age":{"$convert":{"input":"$age","to":"string","onError":"error","onError":None}},
        "weight":{"$convert":{"input":"$weight","to":"int","onError":"error","onError":None}},
        "birthday":{"$convert":{"input":"$birthday","to":"bool","onError":"error","onError":None}},
        "income":{"$convert":{"input":"$income","to":"decimal","onError":"error","onError":None}},
        "has_car":{"$convert":{"input":"$has_car","to":"bool","onError":"error","onError":None}},
        # $toString 可替代
        "_id":{"$convert":{"input":"$_id","to":"string","onError":"error","onError":None}},
        }
  }


ret= db["test"]["test"].aggregate([match,project])
# 结果
[
{
'name': True, 
'age': '10',
'weight': 75, 
'birthday': True, 
'income': Decimal128('8500.23'), 
'has_car': False, 
'_id': '6114e20b661341258c35c08c'
}
]
print(list(ret))

具体见:https://docs.mongodb.com/v4.4/reference/operator/aggregation/convert/

=== index d===

1. $dateFromParts

作用:根据日期的组成属性构造并返回Date对象。

2. $dataToString

作用:将时间类型的数据转成指定格式的字符串

数据格式如下

{
    "_id" : ObjectId("6114e20b661341258c35c08c"),
    "name" : "zhang",
    "age" : 10,
    "weight" : 75.5,
    "birthday" : ISODate("2021-08-12T16:55:39.754Z"),
    "income" : NumberDecimal("8500.23"),
    "has_car" : false
}

birthday转换成不同格式的字符串

match_dict = {"$match": {"name": {"$ne": None}}}

project = {
    "$project": {
        "birthday": 1,
         # 年月日时分秒 
        "format_1": {"$dateToString": {"format": "%Y-%m-%d %H:%M:%S", "date": "$birthday"}},
         # 这一年的第几天
        "format_2": {"$dateToString": {"format": "%j", "date": "$birthday"}},
         # 这一天是周几,1-7
        "format_3": {"$dateToString": {"format": "%u", "date": "$birthday"}},
         # 这一年的第几个周
        "format_4": {"$dateToString": {"format": "%U", "date": "$birthday"}},

    }
}
ret = db["test"].aggregate([match_dict, project])
# 结果
[
{
'_id': ObjectId('6114e20b661341258c35c08c'), 'birthday': datetime.datetime(2021, 8, 12, 16, 55, 39, 754000), 
'format_1': '2021-08-12 16:55:39', 
'format_2': '224', 
'format_3': '4', 
'format_4': '32'
}
]
print(list(ret))

== index e ==

1. $exp

作用:计算e的n次幂
https://docs.mongodb.com/v4.4/reference/operator/aggregation/exp/#mongodb-expression-exp.-exp

2. $eq

作用:判断两个值是否相等

数据格式如下

{
    "_id" : ObjectId("6114e20b661341258c35c08c"),
    "name" : "zhang",
    "age" : 10,
    "weight" : 75.5,
    "birthday" : ISODate("2021-08-12T16:55:39.754Z"),
    "income" : NumberDecimal("8500.23"),
    "has_car" : false
}

代码

match_dict = {"$match": {"name": {"$ne": None}}}

project = {
    "$project": {
        "age": 1, "age_10_flag": {"$eq": ["$age", 10]}
    }
}
ret = db["test"].aggregate([match_dict, project])
# 结果
[
{'_id': ObjectId('6114e20b661341258c35c08c'), 'age': 10, 'age_10_flag': True}
]
print(list(ret))

== index f ==

1. $facet

作用:一次性执行多条aggragate语句:符合查询神器

2. $first

作用: 从分组结果中取第一条—必须和分组搭配使用

db = MongoClient(["39.108.63.181:37017","39.108.63.181:47017","39.108.63.181:57017"],replicaset="rs",connect=True)

group_dict = {"$group":{"_id":"$item","earlist_time":{"$first":"$date"}}}

ret = db["test"]["test"].aggregate([group_dict])
[
{'_id': 'jkl', 'earlist_time': datetime.datetime(2014, 2, 3, 9, 0)}, 
{'_id': 'xyz', 'earlist_time': datetime.datetime(2014, 2, 3, 9, 5)}, 
{'_id': 'abc', 'earlist_time': datetime.datetime(2014, 1, 1, 8, 0)}
]

== index f ==

1. graphLookup

作用:类似于自关联查询

数据

// Newly added document
{ 
    "name" : "china", 
    "_id" : ObjectId("61275f3f1d2b613cc23063ae")
}
// Newly added document
{ 
    "name" : "shanxi", 
    "up" : "china", 
    "_id" : ObjectId("61275f511d2b613cc23063af")
}
// Newly added document
{ 
    "name" : "xian", 
    "up" : "shanxi", 
    "_id" : ObjectId("61275f721d2b613cc23063b0")
}
// Newly added document
{ 
    "name" : "gaoxin", 
    "up" : "xian", 
    "_id" : ObjectId("61275f7c1d2b613cc23063b1")
}


== index R ==

1. $replaceRoot

作用:替换文档,提升文档显示级别,pymongo只识别$replaceWith(replaceRoot的别名)

数据:

// Newly added document
{ 
    "name" : "zhang", 
    "age" : 10.0, 
    "hobby" : {
        "school" : "study", 
        "home" : "game"
    }, 
    "_id" : ObjectId("61274f451d2b613cc230639e")
}

需要显示:

{"school" : "study",  "home" : "game"}

代码

ret = db["test"]["test_aps"].aggregate([{"$match":{"_id":{"$ne":""}}},{"$replaceWith":{"new_dict":"$hobby"}}])
# 结果
[{'new_dict': {'school': 'study', 'home': 'game'}}]

配合match,concat使用

ret = db["test"]["test_aps"].aggregate([
    {"$match":{"_id":{"$ne":""}}},
    {"$replaceWith":{"new_dict":"$hobby"}},
    {"$replaceWith":{"new_dict":{"concat_new_dict":{"$concat":["$new_dict.school","-100-100-","$new_dict.home"]}}}}
    
    ])
# 结果
[{'new_dict': {'concat_new_dict': 'study-100-100-game'}}]

2. $redact

作用:修订/校验,意思是对文档内容进行过滤,选择一些过滤或保留的信息,常常和$cond配合一起使用

数据:

// Newly added document
{ 
    "_id" : 1.0, 
    "title" : "123 Department Report", 
    "tags" : [
        "G", 
        "STLW"
    ], 
    "year" : 2014.0, 
    "subsections" : [
        {
            "subtitle" : "Section 1: Overview", 
            "tags" : [
                "SI", 
                "G"
            ], 
            "content" : "Section 1: This is the content of section 1."
        }, 
        {
            "subtitle" : "Section 2: Analysis", 
            "tags" : [
                "STLW"
            ], 
            "content" : "Section 2: This is the content of section 2."
        }, 
        {
            "subtitle" : "Section 3: Budgeting", 
            "tags" : [
                "TK"
            ], 
            "content" : {
                "text" : "Section 3: This is the content of section3.", 
                "tags" : [
                    "HCS"
                ]
            }
        }
    ]
}

如果想返回,subsections的tags包含”Tk”或”G”的文档,相当于返回部分文档
代码


发表回复

您的电子邮箱地址不会被公开。