Cheat Sheet - MongoDB

MongoDB is a popular NoSQL database that allows unauthenticated access by default.

Authentication

Regardless of the user’s authentication database, Mongo always stores user information in admin.

MongoDB stores all user information, including name, password, and the user’s authentication database, in the system.users collection in the admin database.

See centralized-user-data and system-users-collection.

When you create a user and grant that user access to a single database (aka their authentication database) then that information can only be stored in the admin database.

So, it’s not really a question of “best practice”; storing user details in admin is MongoDB’s choice, as implemented by their user management commands.

Update in response to this comment:

Ok, so the users are always located in the admin db, but I may also add “duplicates” to the other dbs? Maybe the question should be whether there any advantage in adding users to the other “non admin” dbs?

If you intend to have a single user with access to multiple databases then create a single user with roles in each of those databases rather than creating that user multiple times i.e. once in each of those databases. For example:

    use admin;
    db.createUser({user:'userName', pwd:'passwordValue', roles:[
        {role:'readWrite', db:'DatabaseA'},
        {role:'readWrite', db:'DatabaseB'}
    ]});

Create initial admin user

# start mongo and switch to admin db
mongos> use admin
switched to db admin

# create admin user
mongos> db.createUser({
  user: "admin",
  pwd: "<long secure password>",
  roles:[ "userAdminAnyDatabase", "dbAdminAnyDatabase", "readWriteAnyDatabase"] })

Successfully added user: {
	"user" : "admin",
	"roles" : [
		"userAdminAnyDatabase",
		"dbAdminAnyDatabase",
		"readWriteAnyDatabase"
	]
}

# connect with new admin user
$ mongo -u admin -p <long secure password>  --authenticationDatabase admin

# create db
# empty dbs are not persisted so create a collection to make it stay
mongos> use my_app_db
switched to db my_app_db
mongos> db.createCollection("deleteme")
{ "ok" : 1 }
mongos> show collections
deleteme  

# Finally create your app user account app_svc_name

Sharded Cluster with enforced authentication

Create:

  • a cluster-wide admin user
  • a replica set specific admin user

Cluster-wide Admin

admin = db.getSiblingDB("admin");
admin.createUser(
  {
    user: "cluster_admin",
    pwd: "super_complex_password",
    roles: [
      { role: "clusterAdmin", db: "admin" },
      { role: "userAdminAnyDatabase", db: "admin" }
    ]
  }
);

Replica Set Admin (a.k.a shard local)

admin = db.getSiblingDB("admin");
admin.createUser(
  {
    user: "shard_local_admin",
    pwd: "super_complex_password",
    roles: [
      { role: "clusterAdmin", db: "admin" },
      { role: "userAdminAnyDatabase", db: "admin" }
    ]
  }
);

Enable authentication in the mongos configuration

security:
  keyFile: /etc/mongodb/keys/mongodb-internal-cluster-auth.key

Connect to all replica set member nodes

mongo --host rsConfig/mongo-node1:27019,mongo-node2:27019,mongo-node3:27019
mongo --host rs1/mongo-node1:27021,mongo-node2:27021,mongo-node3:27021
mongo --host rs2/mongo-node1:27022,mongo-node2:27022,mongo-node3:27022

Authenticate and check that the admin users exist

db.auth( "cluster_admin", "super_complex_password" )
db.auth( "shard_local_admin", "super_complex_password" )

use admin
db.getUsers()

rs1:PRIMARY> db.getUsers()
[
    {
        "_id" : "admin.shard_local_admin",
        "user" : "shard_local_admin",
        "db" : "admin",
        "roles" : [
             {
                "role" : "clusterAdmin",
                "db" : "admin"
             },
             {
                "role" : "userAdminAnyDatabase",
                "db" : "admin"
             }
        ]
    }
]

Commands

Log in

mongo -u <username> -p <password> --authenticationDatabase <dbname>

Rename collections

db.bookings.renameCollection("bookings.old")

Copy Collection

mongoexport -d db_name -c src_collection | mongoimport -d db_name -c dst_collection --drop
# or
db.myoriginal.aggregate([ { $out: "mycopy" } ])
# or
db.myoriginal_collection.aggregate([{$match: { id: "id" }},{ $out: "myoriginal_duplicate_collection" } ])

Check replication set status

// connect to multiple instances and automatically fail over
// if one becomes unavailable
mongo --host rs1/node1:27017,node2:27017,node3:27017

// get full replication set status
rs.status()

// list replication set members
rs.conf()['members']

Backup/Restore

# restore db from backup
mongorestore --db app_dev --dir dump/app -u app_svc_name -p tG9agv3rnTTAtFiexX5dc
// use admin db
use admin

// create auth user and test credentials
db.createUser({
     user: "app_svc_name",
     pwd: "tG9agv3rnTTAtFiexX5dc",
     roles: [{ role: "dbOwner", db: "app_svc_name" }] })

// authenticate
db.auth("app_svc_name", "tG9agv3rnTTAtFiexX5dc")

// Logout
db.logout()

Documents

// insert single document
db.<collectionName>.insert({field1: "string", field2: 1337});

// insert multiple documents
db.<collectionName>.insert([{field1: "value1"}, {field1: "value2"}]);
db.<collectionName>.insertMany([{field1: "value1"}, {field1: "value2"}]);

// matching document will be updated; if no document matching the ID is found, a new document is created
db.<collectionName>.save({"_id": new ObjectId("jhgsdjhgdsf"), field1: "value", field2: "value"});

// retrieve all records
db.<collectionName>.find();

// retrieve limited number of records; Following command will print 10 results
db.<collectionName>.find().limit(10);

// retrieve records by id
db.<collectionName>.find({"_id": ObjectId("someid")});
db.<collectionName>.find({"title":"Treasure Island"});
db.<collectionName>.find({"feedObj.feedId":"InstanceID"}).pretty();

// find by wildcard
db.<collectionName>.find({"name": /.*m.*/})

// find by in
db.<collectionName>.find({'<fieldname>':{$in:['1','2']}});

// find within a date range
db.<collectionName>.find({{ $and: [
  {"<datefieldname>" : {$gte: ISODate("2018-07-01T00:00:00.000Z")}},
  {"<datefieldname>": {$lte: ISODate("2018-08-01T00:00:00.000Z")}} ]
}});

// find with conditional operators
db.<collectionName>.find({$and: [
  {"_t":{$all:["<field1name>","<field1value>"]}},
  {"<field2name>":"<field2value>"} ]
});

// "projection"
// retrieve values of specific collection attributes by passing an object having
// attribute names assigned to 1 (true) or 0 (false) based on whether that
// attribute value needs to be included in the output or not, respectively
db.<collectionName>.find({"_id": ObjectId("someid")}, {field1: 1, field2: 1});
db.<collectionName>.find({"_id": ObjectId("someid")}, {field1: 0}); // Exclude field1

// returns the results sorted by the name field in ascending order (1).
// Use -1 for descending order
db.<collectionName>.find().sort( { name: 1 } );

// document count
db.<collectionName>.count();

// update specific fields of a single document that match the query condition
// db.<collectionName>.update(<query>, <update>)
db.<collectionName>.update({title : "Treasure Island"}, {$set : {category :"Adventure Fiction"}});

// remove certain fields of a single document the query condition
// db.collection.update(<query>, <update>)
db.<collectionName>.update({title : "Treasure Island"}, {$unset : {category:""}})

// remove certain fields of all documents that match the query condition
// db.collection.update(<query>, <update>, {multi:true} )
db.<collectionName>.update({category : "Fiction"}, {$unset : {category:""}}, {multi:true})

// delete a single document that match the query conditio
// db.collection.remove(<query>, {justOne:true})
db.<collectionName>.remove({title :"Treasure Island"}, {justOne:true})

// delete all documents matching a query condition
// db.collection.remove(<query>)
db.<collectionName>.remove({"category" :"Fiction"})

// delete all documents in a collection
db.<collectionName>.remove({})

Admin commands

// get the collection statistics
db.<collectionName>.stats()
db.printCollectionStats()

// latency statistics for read, write operations including average time taken for reads, writes
// and related umber of operations performed
db.<collectionName>.latencyStats()

// get collection size for data and indexes
db.<collectionName>.dataSize() // size of the collection
db.<collectionName>.storageSize() // total size of document stored in the collection
db.<collectionName>.totalSize() // total size in bytes for both collection data and indexes
db.<collectionName>.totalIndexSize() // total size of all indexes in the collection

Facts

TLS 1.2 for Mongo Routers

To protect your application’s database connection enable TLS on the mongo routers as follows. Note that your mongo driver configuration needs to trust the CA certificate and enable transport encryption with ssl=true.

# See: https://docs.mongodb.com/manual/tutorial/configure-ssl/#procedures-using-net-tls-settings
#
# applies to versions < 4.2
net:
  port: 27017
  bindIp: 0.0.0.0
  ssl:
    mode: allowSSL
    PEMKeyFile: /etc/ssl/mongo-router.pem
    CAFile: /etc/ssl/mongo-ca.pem
    disabledProtocols: TLS1_0,TLS1_1
    allowConnectionsWithoutCertificates: true

# applies to versions ≥ 4.2
net:
  port: 27017
  bindIp: 0.0.0.0
  tls:
    mode: allowTLS
    certificateKeyFile: /etc/ssl/mongo-router.pem
    CAFile: /etc/ssl/mongo-ca.pem
    disabledProtocols: TLS1_0,TLS1_1
    allowConnectionsWithoutCertificates: true

Rolling Update/Cluster Patching

Maintenance (startup in reverse order):

// 1) on primary:
// Instructs the primary of the replica set to become a secondary. After the primary steps down, eligble secondaries will hold an election for primary.
rs.stepDown()
// 2) stop config service
db.getSiblingDB('admin').shutdownServer()
// 3) stop shard services (primary, secondary)
db.getSiblingDB('admin').shutdownServer()
// 4) patch app/OS
// 5) reboot

Replication Concept

  1. write operations go to the primary node
  2. all changes are recorded into operations log
  3. asynchronous replication to secondary
  4. secondaries copy the primary oplog
  5. secondary can use sync source secondary*
  • automatic failover on primary failure

*settings.chainingAllowed (true by default)

Replica set oplog

  • special capped collection that keeps a rolling record of all operations that modify the data stored in the databases
  • idempotent
  • default oplog size (for Unix and Windows systems):

    Storage Engine Default Oplog Size Lower Bound Upper Bound
    In-memory 5% of physical memory 50MB 50GB
    WiredTiger 5% of free disk space 990MB 50GB
    MMAPv1 5% of free disk space 990MB 50GB

Deployment

  • start each server with config options for replSet
    /usr/bin/mongod --replSet "myRepl"
  • initiate the replica set on one node - rs.initialize()
  • verify the configuration - rs.conf()
  • add the rest of the nodes - rs.add() on the primary node
    rs.add("node2:27017") , rs.add("node3:27017")
  • check the status of the replica set - rs.status()

Sharding

Components

  • shard/replica set - subset of the sharded data
  • config servers - metadata and config settings
  • mongos - query router, cluster interface
    sh.addShard("shardName")

Shards

  • contains subset of sharded data
  • replica set for redundancy and HA with odd number of voting members
  • primary shard
  • don’t shard collections if dataset fits into single server
  • –shardsvr in config file (port 27018)
  • every xxx has a primary shard per database
  • all non-shared collections will reside on primary shard
Shard keys (and limitations)
  • shard keys are immutable with max size of 512 bytes (can not be updated/changed)
  • must be ascending indexed key or indexed compound keys that exists in every document in the collection
  • cannot be multikey index, a text index or a geospatial index
  • update operations that affect a single document must include the shard key or the _id field
  • no option for sharding if unique indexes on other fields exist
  • no option for second unique index if the shard key is unique index
  • ranged sharding may not distribute the data evenly
  • hashed sharding distributes the data randomly

Config servers

  • config servers as replica set (only 3.4)
  • stores the metadata for sharded cluster in config database
  • authentication configuration information in admin database
  • holds balancer on Primary node (>= 3.4)
  • –configsvr in config file (port 27019)

mongos

  • caching metadata from config servers
  • routes queries to shards
  • no persistent state
  • updates cache on metadata changes
  • holds balancer (mongodb <= 3.2)
  • mongos version 3.4 can not connect to earlier mongod version

Sharding collection

Step Command
Enable sharding on database sh.enableSharding("users")
Shard collection sh.shardCollection("users.history", { user_id : 1 } )
Shard key - indexed key that exists in every document range based
sh.shardCollection("users.history", { user_id : 1 } )
hashed based
sh.shardCollection( "users.history", { user_id • "hashed" } )

Troubleshooting

sudo egrep -iR "(fasssert|Fatal Assertion|UnrecoverableRollbackError)" /var/log/mongodb/*.log
db._adminCommand({getParameter:"*"})
output
mongos> db._adminCommand({getParameter:"*"})
{
        "ShardingTaskExecutorPoolHostTimeoutMS" : -1,
        "ShardingTaskExecutorPoolMaxConnecting" : -1,
        "ShardingTaskExecutorPoolMaxSize" : -1,
        "ShardingTaskExecutorPoolMinSize" : 1,
        "ShardingTaskExecutorPoolRefreshRequirementMS" : -1,
        "ShardingTaskExecutorPoolRefreshTimeoutMS" : -1,
        "authSchemaVersion" : 5,
        "authenticationMechanisms" : [
                "MONGODB-CR",
                "MONGODB-X509",
                "SCRAM-SHA-1"
        ],
        "clusterAuthMode" : "undefined",
        "connPoolMaxConnsPerHost" : 200,
        "connPoolMaxShardedConnsPerHost" : 200,
        "cursorTimeoutMillis" : NumberLong(600000),
        "disableNonSSLConnectionLogging" : false,
        "enableCollectionLocking" : true,
        "enableLocalhostAuthBypass" : true,
        "enableTestCommands" : false,
        "heapProfilingEnabled" : false,
        "heapProfilingSampleIntervalBytes" : NumberLong(262144),
        "internalAggregationLookupBatchSize" : 101,
        "internalGeoNearQuery2DMaxCoveringCells" : 16,
        "internalGeoPredicateQuery2DMaxCoveringCells" : 16,
        "internalQueryAlwaysMergeOnPrimaryShard" : false,
        "internalQueryCacheEvictionRatio" : 10,
        "internalQueryCacheFeedbacksStored" : 20,
        "internalQueryCacheSize" : 5000,
        "internalQueryEnumerationMaxIntersectPerAnd" : 3,
        "internalQueryEnumerationMaxOrSolutions" : 10,
        "internalQueryExecMaxBlockingSortBytes" : 33554432,
        "internalQueryExecYieldIterations" : 128,
        "internalQueryExecYieldPeriodMS" : 10,
        "internalQueryForceIntersectionPlans" : false,
        "internalQueryMaxScansToExplode" : 200,
        "internalQueryPlanEvaluationCollFraction" : 0.3,
        "internalQueryPlanEvaluationMaxResults" : 101,
        "internalQueryPlanEvaluationWorks" : 10000,
        "internalQueryPlanOrChildrenIndependently" : true,
        "internalQueryPlannerEnableHashIntersection" : false,
        "internalQueryPlannerEnableIndexIntersection" : true,
        "internalQueryPlannerMaxIndexedSolutions" : 64,
        "internalQueryS2GeoCoarsestLevel" : 0,
        "internalQueryS2GeoFinestLevel" : 23,
        "internalQueryS2GeoMaxCells" : 20,
        "internalSCCAllowFastestAuthConfigReads" : false,
        "internalSCCAllowFastestMetadataConfigReads" : false,
        "logComponentVerbosity" : {
                "verbosity" : 0,
                "accessControl" : {
                        "verbosity" : -1
                },
                "command" : {
                        "verbosity" : -1
                },
                "control" : {
                        "verbosity" : -1
                },
                "executor" : {
                        "verbosity" : -1
                },
                "geo" : {
                        "verbosity" : -1
                },
                "index" : {
                        "verbosity" : -1
                },
                "network" : {
                        "verbosity" : -1,
                        "asio" : {
                                "verbosity" : -1
                        },
                        "bridge" : {
                                "verbosity" : -1
                        }
                },
                "query" : {
                        "verbosity" : -1
                },
                "replication" : {
                        "verbosity" : -1
                },
                "sharding" : {
                        "verbosity" : -1
                },
                "storage" : {
                        "verbosity" : -1,
                        "journal" : {
                                "verbosity" : -1
                        }
                },
                "write" : {
                        "verbosity" : -1
                },
                "ftdc" : {
                        "verbosity" : -1
                }
        },
        "logLevel" : 0,
        "logUserIds" : false,
        "opensslCipherConfig" : "",
        "quiet" : false,
        "replMonitorMaxFailedChecks" : 30,
        "saslHostName" : "ip-172-16-40-75",
        "saslServiceName" : "mongodb",
        "saslauthdPath" : "",
        "scramIterationCount" : 10000,
        "scriptingEngineInterruptIntervalMS" : 1000,
        "sslMode" : "disabled",
        "startupAuthSchemaValidation" : true,
        "taskExecutorPoolSize" : 0,
        "tcmallocAggressiveMemoryDecommit" : 0,
        "tcmallocEnableMarkThreadIdle" : true,
        "tcmallocMaxTotalThreadCacheBytes" : NumberLong(1073741824),
        "textSearchEnabled" : true,
        "timeOutMonitoringReplicaSets" : false,
        "traceExceptions" : false,
        "userCacheInvalidationIntervalSecs" : 30,
        "ok" : 1
}
db._adminCommand( {getCmdLineOpts: 1})
markdown source
mongos> db._adminCommand( {getCmdLineOpts: 1})
{
        "argv" : [
                "/usr/bin/mongos",
                "-f",
                "/etc/mongos.conf",
                "--pidfilepath=/var/run/mongodb/mongos.pid"
        ],
        "parsed" : {
                "config" : "/etc/mongos.conf",
                "net" : {
                        "bindIp" : "0.0.0.0",
                        "port" : 27017
                },
                "processManagement" : {
                        "fork" : true,
                        "pidFilePath" : "/var/run/mongodb/mongos.pid"
                },
                "sharding" : {
                        "configDB" : "172.16.40.136:27019,172.16.40.139:27019,172.16.40.140:27019"
                },
                "systemLog" : {
                        "destination" : "file",
                        "logAppend" : true,
                        "path" : "/var/log/mongodb/mongos.log"
                }
        },
        "ok" : 1
}