MongoDB：将多个集合中的数据合并为一个。如何？

229

我如何在MongoDB中将多个集合中的数据合并到一个集合中？

我可以使用map-reduce吗？

我是个新手，我将不胜感激。

mongodb mongodb-query aggregation-framework

— 用户名
source

18

您是否只想将来自不同馆藏的文档复制到一个馆藏中，或者您的计划是什么？您可以指定“合并”吗？如果您只想通过mongo shell复制，a db.collection1.find().forEach(function(doc){db.collection2.save(doc)});就足够了。如果您不使用mongo shell，请指定您使用的驱动程序（java，php，...）。

— proximus

因此，我有一个收藏集（例如用户），而不是其他收藏集说地址簿收藏集，书籍收藏集列表等。我如何基于say_user_id键将这些收藏集组合为一个收藏集。？

— user697697 2011年

相关：stackoverflow.com/q/2350495/435605

— AlikElzin-kilaka 2016年

147

尽管您无法实时执行此操作，但是可以使用MongoDB 1.8+ map / reduce中的“ reduce”输出选项多次运行map-reduce来将数据合并在一起（请参阅http://www.mongodb.org/ display / DOCS / MapReduce＃MapReduce-Outputoptions）。您需要在两个集合中都具有一些密钥，才能用作_id。

例如，假设您有一个users集合和一个comments集合，并且您想要一个新集合，其中每个注释都包含一些用户人口统计信息。

假设该users集合具有以下字段：

_ID
名字
姓
国家
性别
年龄

然后该comments集合具有以下字段：

_ID
用户身份
评论
被创造

您将执行此映射/缩小：

var mapUsers, mapComments, reduce;
db.users_comments.remove();

// setup sample data - wouldn't actually use this in production
db.users.remove();
db.comments.remove();
db.users.save({firstName:"Rich",lastName:"S",gender:"M",country:"CA",age:"18"});
db.users.save({firstName:"Rob",lastName:"M",gender:"M",country:"US",age:"25"});
db.users.save({firstName:"Sarah",lastName:"T",gender:"F",country:"US",age:"13"});
var users = db.users.find();
db.comments.save({userId: users[0]._id, "comment": "Hey, what's up?", created: new ISODate()});
db.comments.save({userId: users[1]._id, "comment": "Not much", created: new ISODate()});
db.comments.save({userId: users[0]._id, "comment": "Cool", created: new ISODate()});
// end sample data setup

mapUsers = function() {
    var values = {
        country: this.country,
        gender: this.gender,
        age: this.age
    };
    emit(this._id, values);
};
mapComments = function() {
    var values = {
        commentId: this._id,
        comment: this.comment,
        created: this.created
    };
    emit(this.userId, values);
};
reduce = function(k, values) {
    var result = {}, commentFields = {
        "commentId": '', 
        "comment": '',
        "created": ''
    };
    values.forEach(function(value) {
        var field;
        if ("comment" in value) {
            if (!("comments" in result)) {
                result.comments = [];
            }
            result.comments.push(value);
        } else if ("comments" in value) {
            if (!("comments" in result)) {
                result.comments = [];
            }
            result.comments.push.apply(result.comments, value.comments);
        }
        for (field in value) {
            if (value.hasOwnProperty(field) && !(field in commentFields)) {
                result[field] = value[field];
            }
        }
    });
    return result;
};
db.users.mapReduce(mapUsers, reduce, {"out": {"reduce": "users_comments"}});
db.comments.mapReduce(mapComments, reduce, {"out": {"reduce": "users_comments"}});
db.users_comments.find().pretty(); // see the resulting collection

此时，您将有一个名为的新集合users_comments，其中包含合并的数据，您现在可以使用它。这些精简的集合都具有_id您在地图函数中发出的键，然后所有值都是该value键内的子对象-值不在这些精简文档的顶层。

这是一个简单的例子。您可以根据需要增加更多的收藏集来重复此操作，以保持减少的收藏集的数量。您还可以在流程中进行数据汇总和汇总。随着汇总和保留现有字段的逻辑变得更加复杂，您可能会定义多个reduce函数。

您还将注意到，现在每个用户都有一个文档，该用户的所有注释都排列在一个数组中。如果我们要合并具有一对一关系而不是一对多关系的数据，那么它将是平坦的，您可以简单地使用如下的reduce函数：

reduce = function(k, values) {
    var result = {};
    values.forEach(function(value) {
        var field;
        for (field in value) {
            if (value.hasOwnProperty(field)) {
                result[field] = value[field];
            }
        }
    });
    return result;
};

如果要展平users_comments集合，以便每个注释一个文档，请另外运行以下命令：

var map, reduce;
map = function() {
    var debug = function(value) {
        var field;
        for (field in value) {
            print(field + ": " + value[field]);
        }
    };
    debug(this);
    var that = this;
    if ("comments" in this.value) {
        this.value.comments.forEach(function(value) {
            emit(value.commentId, {
                userId: that._id,
                country: that.value.country,
                age: that.value.age,
                comment: value.comment,
                created: value.created,
            });
        });
    }
};
reduce = function(k, values) {
    var result = {};
    values.forEach(function(value) {
        var field;
        for (field in value) {
            if (value.hasOwnProperty(field)) {
                result[field] = value[field];
            }
        }
    });
    return result;
};
db.users_comments.mapReduce(map, reduce, {"out": "comments_with_demographics"});

绝对不应即时执行此技术。它适合于cron作业或类似的定期更新合并数据的作业。您可能需要ensureIndex在新集合上运行，以确保针对它执行的查询能够快速运行（请记住，您的数据仍在value键中，因此，如果要为comments_with_demographics注释created时间建立索引，则可以db.comments_with_demographics.ensureIndex({"value.created": 1});

— 马歇尔
source

1

我可能永远不会在生产软件中做到这一点，但这仍然是一种很酷的技巧。

— 戴夫·格里菲斯

3

谢谢，戴夫。在过去的3个月中，我使用该技术为生产中流量较高的站点生成了导出和报告表，没有出现问题。下面是描述了相似的使用技术的另一篇文章：tebros.com/2011/07/...

— rmarscher

1

感谢@rmarscher，您的额外详细信息确实帮助我更好地了解了所有内容。

— Benstr 2014年

5

我应该使用聚合管道和新的$ lookup操作的示例来更新此答案。在这里提到它，直到我可以整理出适当的文字。docs.mongodb.org/manual/reference/operator/aggregation/lookup

— rmarscher

1

仅供参考，对于那些想要快速了解其功能的人，这是users_comments第一段代码gist

— Nolan Amy

127

现在，MongoDB 3.2允许一个通过$ lookup聚合阶段将多个集合中的数据合并为一个。举一个实际的例子，假设您有关于书籍的数据分为两个不同的馆藏。

第一个集合称为books，具有以下数据：

{
    "isbn": "978-3-16-148410-0",
    "title": "Some cool book",
    "author": "John Doe"
}
{
    "isbn": "978-3-16-148999-9",
    "title": "Another awesome book",
    "author": "Jane Roe"
}

第二个集合称为books_selling_data，具有以下数据：

{
    "_id": ObjectId("56e31bcf76cdf52e541d9d26"),
    "isbn": "978-3-16-148410-0",
    "copies_sold": 12500
}
{
    "_id": ObjectId("56e31ce076cdf52e541d9d28"),
    "isbn": "978-3-16-148999-9",
    "copies_sold": 720050
}
{
    "_id": ObjectId("56e31ce076cdf52e541d9d29"),
    "isbn": "978-3-16-148999-9",
    "copies_sold": 1000
}

要合并两个集合，只需按以下方式使用$ lookup即可：

db.books.aggregate([{
    $lookup: {
            from: "books_selling_data",
            localField: "isbn",
            foreignField: "isbn",
            as: "copies_sold"
        }
}])

聚合之后，books集合将如下所示：

{
    "isbn": "978-3-16-148410-0",
    "title": "Some cool book",
    "author": "John Doe",
    "copies_sold": [
        {
            "_id": ObjectId("56e31bcf76cdf52e541d9d26"),
            "isbn": "978-3-16-148410-0",
            "copies_sold": 12500
        }
    ]
}
{
    "isbn": "978-3-16-148999-9",
    "title": "Another awesome book",
    "author": "Jane Roe",
    "copies_sold": [
        {
            "_id": ObjectId("56e31ce076cdf52e541d9d28"),
            "isbn": "978-3-16-148999-9",
            "copies_sold": 720050
        },
        {
            "_id": ObjectId("56e31ce076cdf52e541d9d28"),
            "isbn": "978-3-16-148999-9",
            "copies_sold": 1000
        }
    ]
}

请注意以下几点很重要：

在这种情况下books_selling_data，无法对“ from”集合进行分片。
如上面的示例，“ as”字段将是一个数组。
如果在各自的集合中不存在$ lookup阶段的 “ localField”和“ foreignField”选项，则出于匹配目的，它们将被视为null（$ lookup文档提供了一个完美的示例）。

因此，作为一个结论，如果您想合并两个馆藏，在这种情况下，如果拥有一个固定的copys_sold字段并出售已售出的总册数，则您将不得不做更多的工作，可能使用一个中间馆藏，然后，是$出来到最终集合。

— 布鲁诺·克雷布斯
source

嗨，在这里，您能告诉我们什么是优化的数据管理方式：User，file.files和file.chunks是三个集合，我希望特定用户及其所有相关文件能够响应。{“ name”：“ batMan”，“ email”：“ bt@gmail.com”，“ files”：[{{file1}，{file2}，{file3}，....等等]）

— mfaisalhyder

可以在以下位置找到上述解决方案的官方文档示例：docs.mongodb.com/manual/reference/operator/aggregation/lookup

— Jakub Czaplicki

4

好吧，实际上我的回答已经有了指向官方文档的三个链接。但是还是要感谢您的贡献。@JakubCzaplicki

— Bruno Krebs

2

我可能会出现全脑故障（最有可能），但是$lookup“ localField”和“ foreignField”都不都应该等于“ isbn”吗？不是“ _id”和“ isbn”？

— 2016年

13

如果mongodb中没有大容量插入，我们将循环中的所有对象small_collection，并将它们一个接一个地插入big_collection：

db.small_collection.find().forEach(function(obj){ 
   db.big_collection.insert(obj)
});

— Hieu Le
source

db.colleciton.insert（[{}，{}，{}]）插入接受数组。

— augurone

2

这对于小型集合来说很好用，但不要忘了迁移索引:)

— Sebastien Lorber

12

$ lookup的非常基本的示例。

db.getCollection('users').aggregate([
    {
        $lookup: {
            from: "userinfo",
            localField: "userId",
            foreignField: "userId",
            as: "userInfoData"
        }
    },
    {
        $lookup: {
            from: "userrole",
            localField: "userId",
            foreignField: "userId",
            as: "userRoleData"
        }
    },
    { $unwind: { path: "$userInfoData", preserveNullAndEmptyArrays: true }},
    { $unwind: { path: "$userRoleData", preserveNullAndEmptyArrays: true }}
])

这里用

 { $unwind: { path: "$userInfoData", preserveNullAndEmptyArrays: true }}, 
 { $unwind: { path: "$userRoleData", preserveNullAndEmptyArrays: true }}

代替

{ $unwind:"$userRoleData"} 
{ $unwind:"$userRoleData"}

因为{$ unwind：“ $ userRoleData”}，如果在$ lookup中找不到匹配的记录，它将返回空或0结果。

— 阿尼什·阿加瓦尔（Anish Agarwal）
source

11

在单个查询中，可以使用聚合和查找以“ SQL UNION”方式在MongoDB中进行联合。这是我测试过的可用于MongoDB 4.0的示例：

// Create employees data for testing the union.
db.getCollection('employees').insert({ name: "John", type: "employee", department: "sales" });
db.getCollection('employees').insert({ name: "Martha", type: "employee", department: "accounting" });
db.getCollection('employees').insert({ name: "Amy", type: "employee", department: "warehouse" });
db.getCollection('employees').insert({ name: "Mike", type: "employee", department: "warehouse"  });

// Create freelancers data for testing the union.
db.getCollection('freelancers').insert({ name: "Stephany", type: "freelancer", department: "accounting" });
db.getCollection('freelancers').insert({ name: "Martin", type: "freelancer", department: "sales" });
db.getCollection('freelancers').insert({ name: "Doug", type: "freelancer", department: "warehouse"  });
db.getCollection('freelancers').insert({ name: "Brenda", type: "freelancer", department: "sales"  });

// Here we do a union of the employees and freelancers using a single aggregation query.
db.getCollection('freelancers').aggregate( // 1. Use any collection containing at least one document.
  [
    { $limit: 1 }, // 2. Keep only one document of the collection.
    { $project: { _id: '$$REMOVE' } }, // 3. Remove everything from the document.

    // 4. Lookup collections to union together.
    { $lookup: { from: 'employees', pipeline: [{ $match: { department: 'sales' } }], as: 'employees' } },
    { $lookup: { from: 'freelancers', pipeline: [{ $match: { department: 'sales' } }], as: 'freelancers' } },

    // 5. Union the collections together with a projection.
    { $project: { union: { $concatArrays: ["$employees", "$freelancers"] } } },

    // 6. Unwind and replace root so you end up with a result set.
    { $unwind: '$union' },
    { $replaceRoot: { newRoot: '$union' } }
  ]);

以下是其工作原理的说明：

实例化数据库中至少包含一个文档aggregate的任何集合。如果您不能保证数据库的任何集合都不会为空，则可以通过在数据库中创建某种“虚拟”集合来解决此问题，该“虚拟”集合中将包含一个空文档，专门用于进行联合查询。
使管道的第一阶段成为{ $limit: 1 }。这将删除集合中除第一个文档外的所有文档。
通过使用$project阶段来剥离剩余文档的所有字段：
```
{ $project: { _id: '$$REMOVE' } }
```

您的汇总现在包含一个空文档。现在该为要合并在一起的每个集合添加查找。您可以使用该pipeline字段进行某些特定的过滤，也可以使用离开localField和foreignField为null来匹配整个集合。

{ $lookup: { from: 'collectionToUnion1', pipeline: [...], as: 'Collection1' } },
{ $lookup: { from: 'collectionToUnion2', pipeline: [...], as: 'Collection2' } },
{ $lookup: { from: 'collectionToUnion3', pipeline: [...], as: 'Collection3' } }

现在，您有一个包含单个文档的聚合，该文档包含3个数组，如下所示：

{
    Collection1: [...],
    Collection2: [...],
    Collection3: [...]
}

然后，您可以使用一个$project阶段以及$concatArrays聚合运算符将它们合并到一个数组中：

{
  "$project" :
  {
    "Union" : { $concatArrays: ["$Collection1", "$Collection2", "$Collection3"] }
  }
}

现在，您有了一个包含单个文档的聚合，其中包含一个包含集合并集的数组。剩下要做的是添加$unwind和$replaceRoot阶段，以将数组拆分为单独的文档：
```
{ $unwind: "$Union" },
{ $replaceRoot: { newRoot: "$Union" } }
```
Voilà。现在，您有一个结果集，其中包含要合并在一起的集合。然后，您可以添加更多阶段以对其进行进一步过滤，排序，应用skip（）和limit（）。您想要的几乎任何东西。

— Sboisse
source

查询失败，并显示消息“ $ projection需要至少一个输出字段”。

— abhishek_ganta19年

@abhishek如果您明白了，那是因为您试图在单个投影阶段将所有字段从单个文档中剥离。MongoDB不允许您这样做。要解决此问题，您需要进行2次连续投影，其中第一个投影将除去_id以外的所有内容，第二个投影将除去其余的_id。

— sboisse

@abhishek通过在使用'$$ REMOVE'变量的单个项目中替换$ project阶段，我进一步简化了查询。我还添加了一个具体示例，您可以将其直接复制并粘贴到查询测试器中以查看其工作原理。

— sboisse

@sboisse，此解决方案适用于较小的馆藏，但是，如果我要对较大的馆藏（100,000多个文档）执行此操作，则会遇到“ collectionToUnion1中的文档总大小超出最大文档大小”的问题。在文档中，建议将$ unwind直接放在$ lookup之后，以避免创建大型的中间文档。我尚未成功使用该方法修改此解决方案。您是否遇到过这个问题并且不得不使用该方法？链接到我引用的文档：[link]（docs.mongodb.com/manual/core/aggregation-pipeline-optimization/…）

— lucky7samson

不幸的是，@ lucky7samson我必须处理的数据量并不大。因此，我不必面对您所指的问题。就我而言，我可以在将记录与其余记录合并之前对集合应用筛选以进行查找，因此要合并的数据量非常小。

— sboisse

9

对聚合中的多个集合使用多个$ lookup

查询：

db.getCollection('servicelocations').aggregate([
  {
    $match: {
      serviceLocationId: {
        $in: ["36728"]
      }
    }
  },
  {
    $lookup: {
      from: "orders",
      localField: "serviceLocationId",
      foreignField: "serviceLocationId",
      as: "orders"
    }
  },
  {
    $lookup: {
      from: "timewindowtypes",
      localField: "timeWindow.timeWindowTypeId",
      foreignField: "timeWindowTypeId",
      as: "timeWindow"
    }
  },
  {
    $lookup: {
      from: "servicetimetypes",
      localField: "serviceTimeTypeId",
      foreignField: "serviceTimeTypeId",
      as: "serviceTime"
    }
  },
  {
    $unwind: "$orders"
  },
  {
    $unwind: "$serviceTime"
  },
  {
    $limit: 14
  }
])

结果：

{
    "_id" : ObjectId("59c3ac4bb7799c90ebb3279b"),
    "serviceLocationId" : "36728",
    "regionId" : 1.0,
    "zoneId" : "DXBZONE1",
    "description" : "AL HALLAB REST EMIRATES MALL",
    "locationPriority" : 1.0,
    "accountTypeId" : 1.0,
    "locationType" : "SERVICELOCATION",
    "location" : {
        "makani" : "",
        "lat" : 25.119035,
        "lng" : 55.198694
    },
    "deliveryDays" : "MTWRFSU",
    "timeWindow" : [ 
        {
            "_id" : ObjectId("59c3b0a3b7799c90ebb32cde"),
            "timeWindowTypeId" : "1",
            "Description" : "MORNING",
            "timeWindow" : {
                "openTime" : "06:00",
                "closeTime" : "08:00"
            },
            "accountId" : 1.0
        }, 
        {
            "_id" : ObjectId("59c3b0a3b7799c90ebb32cdf"),
            "timeWindowTypeId" : "1",
            "Description" : "MORNING",
            "timeWindow" : {
                "openTime" : "09:00",
                "closeTime" : "10:00"
            },
            "accountId" : 1.0
        }, 
        {
            "_id" : ObjectId("59c3b0a3b7799c90ebb32ce0"),
            "timeWindowTypeId" : "1",
            "Description" : "MORNING",
            "timeWindow" : {
                "openTime" : "10:30",
                "closeTime" : "11:30"
            },
            "accountId" : 1.0
        }
    ],
    "address1" : "",
    "address2" : "",
    "phone" : "",
    "city" : "",
    "county" : "",
    "state" : "",
    "country" : "",
    "zipcode" : "",
    "imageUrl" : "",
    "contact" : {
        "name" : "",
        "email" : ""
    },
    "status" : "ACTIVE",
    "createdBy" : "",
    "updatedBy" : "",
    "updateDate" : "",
    "accountId" : 1.0,
    "serviceTimeTypeId" : "1",
    "orders" : [ 
        {
            "_id" : ObjectId("59c3b291f251c77f15790f92"),
            "orderId" : "AQ18O1704264",
            "serviceLocationId" : "36728",
            "orderNo" : "AQ18O1704264",
            "orderDate" : "18-Sep-17",
            "description" : "AQ18O1704264",
            "serviceType" : "Delivery",
            "orderSource" : "Import",
            "takenBy" : "KARIM",
            "plannedDeliveryDate" : ISODate("2017-08-26T00:00:00.000Z"),
            "plannedDeliveryTime" : "",
            "actualDeliveryDate" : "",
            "actualDeliveryTime" : "",
            "deliveredBy" : "",
            "size1" : 296.0,
            "size2" : 3573.355,
            "size3" : 240.811,
            "jobPriority" : 1.0,
            "cancelReason" : "",
            "cancelDate" : "",
            "cancelBy" : "",
            "reasonCode" : "",
            "reasonText" : "",
            "status" : "",
            "lineItems" : [ 
                {
                    "ItemId" : "BNWB020",
                    "size1" : 15.0,
                    "size2" : 78.6,
                    "size3" : 6.0
                }, 
                {
                    "ItemId" : "BNWB021",
                    "size1" : 20.0,
                    "size2" : 252.0,
                    "size3" : 11.538
                }, 
                {
                    "ItemId" : "BNWB023",
                    "size1" : 15.0,
                    "size2" : 285.0,
                    "size3" : 16.071
                }, 
                {
                    "ItemId" : "CPMW112",
                    "size1" : 3.0,
                    "size2" : 25.38,
                    "size3" : 1.731
                }, 
                {
                    "ItemId" : "MMGW001",
                    "size1" : 25.0,
                    "size2" : 464.375,
                    "size3" : 46.875
                }, 
                {
                    "ItemId" : "MMNB218",
                    "size1" : 50.0,
                    "size2" : 920.0,
                    "size3" : 60.0
                }, 
                {
                    "ItemId" : "MMNB219",
                    "size1" : 50.0,
                    "size2" : 630.0,
                    "size3" : 40.0
                }, 
                {
                    "ItemId" : "MMNB220",
                    "size1" : 50.0,
                    "size2" : 416.0,
                    "size3" : 28.846
                }, 
                {
                    "ItemId" : "MMNB270",
                    "size1" : 50.0,
                    "size2" : 262.0,
                    "size3" : 20.0
                }, 
                {
                    "ItemId" : "MMNB302",
                    "size1" : 15.0,
                    "size2" : 195.0,
                    "size3" : 6.0
                }, 
                {
                    "ItemId" : "MMNB373",
                    "size1" : 3.0,
                    "size2" : 45.0,
                    "size3" : 3.75
                }
            ],
            "accountId" : 1.0
        }, 
        {
            "_id" : ObjectId("59c3b291f251c77f15790f9d"),
            "orderId" : "AQ137O1701240",
            "serviceLocationId" : "36728",
            "orderNo" : "AQ137O1701240",
            "orderDate" : "18-Sep-17",
            "description" : "AQ137O1701240",
            "serviceType" : "Delivery",
            "orderSource" : "Import",
            "takenBy" : "KARIM",
            "plannedDeliveryDate" : ISODate("2017-08-26T00:00:00.000Z"),
            "plannedDeliveryTime" : "",
            "actualDeliveryDate" : "",
            "actualDeliveryTime" : "",
            "deliveredBy" : "",
            "size1" : 28.0,
            "size2" : 520.11,
            "size3" : 52.5,
            "jobPriority" : 1.0,
            "cancelReason" : "",
            "cancelDate" : "",
            "cancelBy" : "",
            "reasonCode" : "",
            "reasonText" : "",
            "status" : "",
            "lineItems" : [ 
                {
                    "ItemId" : "MMGW001",
                    "size1" : 25.0,
                    "size2" : 464.38,
                    "size3" : 46.875
                }, 
                {
                    "ItemId" : "MMGW001-F1",
                    "size1" : 3.0,
                    "size2" : 55.73,
                    "size3" : 5.625
                }
            ],
            "accountId" : 1.0
        }, 
        {
            "_id" : ObjectId("59c3b291f251c77f15790fd8"),
            "orderId" : "AQ110O1705036",
            "serviceLocationId" : "36728",
            "orderNo" : "AQ110O1705036",
            "orderDate" : "18-Sep-17",
            "description" : "AQ110O1705036",
            "serviceType" : "Delivery",
            "orderSource" : "Import",
            "takenBy" : "KARIM",
            "plannedDeliveryDate" : ISODate("2017-08-26T00:00:00.000Z"),
            "plannedDeliveryTime" : "",
            "actualDeliveryDate" : "",
            "actualDeliveryTime" : "",
            "deliveredBy" : "",
            "size1" : 60.0,
            "size2" : 1046.0,
            "size3" : 68.0,
            "jobPriority" : 1.0,
            "cancelReason" : "",
            "cancelDate" : "",
            "cancelBy" : "",
            "reasonCode" : "",
            "reasonText" : "",
            "status" : "",
            "lineItems" : [ 
                {
                    "ItemId" : "MMNB218",
                    "size1" : 50.0,
                    "size2" : 920.0,
                    "size3" : 60.0
                }, 
                {
                    "ItemId" : "MMNB219",
                    "size1" : 10.0,
                    "size2" : 126.0,
                    "size3" : 8.0
                }
            ],
            "accountId" : 1.0
        }
    ],
    "serviceTime" : {
        "_id" : ObjectId("59c3b07cb7799c90ebb32cdc"),
        "serviceTimeTypeId" : "1",
        "serviceTimeType" : "nohelper",
        "description" : "",
        "fixedTime" : 30.0,
        "variableTime" : 0.0,
        "accountId" : 1.0
    }
}

— 卡地基亚
source

1

Mongorestore具有此功能，可以将其附加到数据库中已有的任何内容之上，因此可以将此行为用于合并两个集合：

mongodump集合1
collection2.rename（collection1）
mongorestore

尚未尝试过，但它可能比map / reduce方法执行得更快。

— uli里
source

1

首先Mongo 4.4，我们可以通过将新的$unionWith聚合阶段与$group的new $accumulator运算符耦合来在聚合管道内实现此联接：

// > db.users.find()
//   [{ user: 1, name: "x" }, { user: 2, name: "y" }]
// > db.books.find()
//   [{ user: 1, book: "a" }, { user: 1, book: "b" }, { user: 2, book: "c" }]
// > db.movies.find()
//   [{ user: 1, movie: "g" }, { user: 2, movie: "h" }, { user: 2, movie: "i" }]
db.users.aggregate([
  { $unionWith: "books"  },
  { $unionWith: "movies" },
  { $group: {
    _id: "$user",
    user: {
      $accumulator: {
        accumulateArgs: ["$name", "$book", "$movie"],
        init: function() { return { books: [], movies: [] } },
        accumulate: function(user, name, book, movie) {
          if (name) user.name = name;
          if (book) user.books.push(book);
          if (movie) user.movies.push(movie);
          return user;
        },
        merge: function(userV1, userV2) {
          if (userV2.name) userV1.name = userV2.name;
          userV1.books.concat(userV2.books);
          userV1.movies.concat(userV2.movies);
          return userV1;
        },
        lang: "js"
      }
    }
  }}
])
// { _id: 1, user: { books: ["a", "b"], movies: ["g"], name: "x" } }
// { _id: 2, user: { books: ["c"], movies: ["h", "i"], name: "y" } }

$unionWith将来自给定集合的记录合并到聚合管道中已存在的文档中。在两个联合阶段之后，因此我们将所有用户，书籍和电影记录都纳入管道中。
然后$group，我们$user使用$accumulator运算符记录和累积项目，以便在文档分组后进行自定义累积：
- 我们要累积的字段用定义accumulateArgs。
- init 定义将元素分组时将累积的状态。
- 该accumulate功能允许对记录进行分组以执行自定义操作，以建立累积状态。例如，如果要分组的项目具有book定义的字段，那么我们将更新books状态的一部分。
- merge用于合并两个内部状态。它仅用于在分片群集上运行的聚合或操作超过内存限制时使用。

— Xavier Guihot
source

是否可以为以下项目检索类似的输出：4.2.6版本

— Nixit Patel

0

是的，您可以：采用我今天编写的此实用程序功能：

function shangMergeCol() {
  tcol= db.getCollection(arguments[0]);
  for (var i=1; i<arguments.length; i++){
    scol= db.getCollection(arguments[i]);
    scol.find().forEach(
        function (d) {
            tcol.insert(d);
        }
    )
  }
}

您可以将任意数量的集合传递给此函数，第一个将成为目标集合。其余所有集合都是要转移到目标集合的源。

— 尚阿布
source

-1

代码段。礼貌-包括该堆栈在内的堆栈溢出多个帖子。

 db.cust.drop();
 db.zip.drop();
 db.cust.insert({cust_id:1, zip_id: 101});
 db.cust.insert({cust_id:2, zip_id: 101});
 db.cust.insert({cust_id:3, zip_id: 101});
 db.cust.insert({cust_id:4, zip_id: 102});
 db.cust.insert({cust_id:5, zip_id: 102});

 db.zip.insert({zip_id:101, zip_cd:'AAA'});
 db.zip.insert({zip_id:102, zip_cd:'BBB'});
 db.zip.insert({zip_id:103, zip_cd:'CCC'});

mapCust = function() {
    var values = {
        cust_id: this.cust_id
    };
    emit(this.zip_id, values);
};

mapZip = function() {
    var values = {
    zip_cd: this.zip_cd
    };
    emit(this.zip_id, values);
};

reduceCustZip =  function(k, values) {
    var result = {};
    values.forEach(function(value) {
    var field;
        if ("cust_id" in value) {
            if (!("cust_ids" in result)) {
                result.cust_ids = [];
            }
            result.cust_ids.push(value);
        } else {
    for (field in value) {
        if (value.hasOwnProperty(field) ) {
                result[field] = value[field];
        }
         };  
       }
      });
       return result;
};


db.cust_zip.drop();
db.cust.mapReduce(mapCust, reduceCustZip, {"out": {"reduce": "cust_zip"}});
db.zip.mapReduce(mapZip, reduceCustZip, {"out": {"reduce": "cust_zip"}});
db.cust_zip.find();


mapCZ = function() {
    var that = this;
    if ("cust_ids" in this.value) {
        this.value.cust_ids.forEach(function(value) {
            emit(value.cust_id, {
                zip_id: that._id,
                zip_cd: that.value.zip_cd
            });
        });
    }
};

reduceCZ = function(k, values) {
    var result = {};
    values.forEach(function(value) {
        var field;
        for (field in value) {
            if (value.hasOwnProperty(field)) {
                result[field] = value[field];
            }
        }
    });
    return result;
};
db.cust_zip_joined.drop();
db.cust_zip.mapReduce(mapCZ, reduceCZ, {"out": "cust_zip_joined"}); 
db.cust_zip_joined.find().pretty();


var flattenMRCollection=function(dbName,collectionName) {
    var collection=db.getSiblingDB(dbName)[collectionName];

    var i=0;
    var bulk=collection.initializeUnorderedBulkOp();
    collection.find({ value: { $exists: true } }).addOption(16).forEach(function(result) {
        print((++i));
        //collection.update({_id: result._id},result.value);

        bulk.find({_id: result._id}).replaceOne(result.value);

        if(i%1000==0)
        {
            print("Executing bulk...");
            bulk.execute();
            bulk=collection.initializeUnorderedBulkOp();
        }
    });
    bulk.execute();
};


flattenMRCollection("mydb","cust_zip_joined");
db.cust_zip_joined.find().pretty();

— 维普尔·梅塔（Vipul Mehta）
source

-2

您必须在应用程序层中执行此操作。如果您使用的是ORM，则可以使用批注（或类似方法）提取其他集合中存在的引用。我只使用过Morphia，并且@Reference注释在查询时获取引用的实体，因此我可以避免自己在代码中这样做。

— 龙虾1234
source