MongoDB-2

MongoDB实战2


MapReduce

类似于MySQL的group by,做统计工作。Map函数调用emit(key,value),遍历集合中记录,将参数传递给Reduce函数处理。
使用db.runCommand或mapReduce命令执行。

格式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
db.runCommand(
{mapreduce:<collection>,
map:<mapfunction>,
reduce:<reducefunction>,
[,query:<query filter object]
[,sort:<sorts the input objects using this key.Useful for optimization,like sorting by the emit key for fewer reduces>]
[,limit:<number of objects to return from collection>]
[,out:<output collections>]
[,keeptemp:<true|false>]
[,finalize:<finalizefunction>]
[,scope:<object where fields go into javascript global scope>]
[,verbose:true]
}
);

参数名称 说明
mapreduce 指定数据集
map 指定映射函数(生成键值对序列,作为统计函数输入参数)
reduce 指定统计函数
query 目标过滤
sort 目标记录排序
limit 限制目标记录数量
out 统计结果存放集合,不指定则使用临时集合,客户端断开自动删除
keeptemp 是否保留临时集合
finalize 最终处理函数
scope 向map、reduce和finalize导入外部变量
verbose 显示详细的时间统计信息

MapReduce实例

准备数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
> for(var i=1;i<10;i++)db.students.insert({class:1,age:i});
WriteResult({ "nInserted" : 1 })
> for(var i=1;i<10;i++)db.students.insert({class:2,age:i});
WriteResult({ "nInserted" : 1 })
> db.students.find();
{ "_id" : ObjectId("57da8340279c0ecac9b96fe1"), "class" : 1, "age" : 1 }
{ "_id" : ObjectId("57da8340279c0ecac9b96fe2"), "class" : 1, "age" : 2 }
{ "_id" : ObjectId("57da8340279c0ecac9b96fe3"), "class" : 1, "age" : 3 }
{ "_id" : ObjectId("57da8340279c0ecac9b96fe4"), "class" : 1, "age" : 4 }
{ "_id" : ObjectId("57da8340279c0ecac9b96fe5"), "class" : 1, "age" : 5 }
{ "_id" : ObjectId("57da8340279c0ecac9b96fe6"), "class" : 1, "age" : 6 }
{ "_id" : ObjectId("57da8340279c0ecac9b96fe7"), "class" : 1, "age" : 7 }
{ "_id" : ObjectId("57da8340279c0ecac9b96fe8"), "class" : 1, "age" : 8 }
{ "_id" : ObjectId("57da8340279c0ecac9b96fe9"), "class" : 1, "age" : 9 }
{ "_id" : ObjectId("57da8990279c0ecac9b96fea"), "class" : 2, "age" : 1 }
{ "_id" : ObjectId("57da8990279c0ecac9b96feb"), "class" : 2, "age" : 2 }
{ "_id" : ObjectId("57da8990279c0ecac9b96fec"), "class" : 2, "age" : 3 }
{ "_id" : ObjectId("57da8990279c0ecac9b96fed"), "class" : 2, "age" : 4 }
{ "_id" : ObjectId("57da8990279c0ecac9b96fee"), "class" : 2, "age" : 5 }
{ "_id" : ObjectId("57da8990279c0ecac9b96fef"), "class" : 2, "age" : 6 }
{ "_id" : ObjectId("57da8990279c0ecac9b96ff0"), "class" : 2, "age" : 7 }
{ "_id" : ObjectId("57da8990279c0ecac9b96ff1"), "class" : 2, "age" : 8 }
{ "_id" : ObjectId("57da8990279c0ecac9b96ff2"), "class" : 2, "age" : 9 }

分别指定函数及运行结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
> m = function(){emit(this.class,1)}
> r = function(key,value){var x = 0;values.forEach(function(v){x += v});return x;}
> f = function(key,value){return {class:key,count:value};}
> re = db.runCommand({ mapreduce:"students", map:m, reduce:r, out:"students_res",finalize:f,query:{age:{$lt:5}}});
{
"result" : "students_res",
"timeMillis" : 148,
"counts" : {
"input" : 8,
"emit" : 8,
"reduce" : 2,
"output" : 2
},
"ok" : 1
}
> db.students_res.find();
{ "_id" : 1, "value" : { "class" : 1, "count" : 4 } }
{ "_id" : 2, "value" : { "class" : 2, "count" : 4 } }

索引

索引信息保存在system.indexes中,默认为_id创建索引,并且不可删除,创建索引可以后台执行。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
//创建索引
> db.students.ensureIndex({age:1}); //1升序,-1降序
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
//查看索引
> db.students.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.students"
},
{
"v" : 1,
"key" : {
"age" : 1
},
"name" : "age_1",
"ns" : "test.students"
}
]
//后台执行创建索引
> db.students.ensureIndex({class:1},{background:true});
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 2,
"numIndexesAfter" : 3,
"ok" : 1
}

索引可以是任何类型的字段,甚至是文档

1
2
3
4
5
6
> db.factories.insert({name:"wwl",addr:{city:"BJ",state:"BJ"}});
//在addr列创建索引
> db.factories.ensureIndex({addr:1});
//查询一会用到索引,查询二不会用到(顺序不一致)
查询一:db.factories.find({addr:{city:"BJ",state:"BJ"}});
查询二:db.factories.find({addr:{state:"BJ",city:"BJ"}});

组合索引

1
db.factories.ensureIndex({"addr.city":1,"addr.state":-1});

唯一索引

1
db.factories.ensureIndex({name:"wwl"},{unique:true});

强制索引

1
db.t5.find({age:{$lt:5}}).hint({name:1,age:1}).explain();

删除索引

1
2
3
4
//删除所有索引
db.factories.dropIndexes();
//指定删除
db.factories.drop({addr:1});