Skip to content

Instantly share code, notes, and snippets.

@flaschbier
Last active February 7, 2016 15:41
Show Gist options
  • Save flaschbier/a3e9419428fc0b47006a to your computer and use it in GitHub Desktop.
Save flaschbier/a3e9419428fc0b47006a to your computer and use it in GitHub Desktop.
Fiddling with regex select to multi valued attributes in MongoDB. Working fine with Python 2.7.6 and PyMongo 3.2.1
#!/usr/bin/env python
# encoding: utf-8
"""
http://stackoverflow.com/questions/35248610
how to “find” docs in mongodb (in python) where a substring exists in a field
which is a list of strings? [no duplicate]
"""
import json
import sys
import unittest
import pymongo
from pymongo import MongoClient
class TestMongoMultiValueQuery(unittest.TestCase):
@classmethod
def setUpClass(cls):
# Populate collection q35248610 in database test
cls.client = MongoClient()
cls.db = cls.client.test
# choose a unique collection name
cls.db.q35248610.drop()
cls.coll = cls.db.q35248610
docs = [ { "key" : "t1", "mvattr" : [ "one", "two" ] },
{ "key" : "t2", "mvattr" : [ "not one", "but two" ] },
{ "key" : "t3", "mvattr" : [ "two", "one" ] },
{ "key" : "t4", "mvattr" : [ "not two", "but one" ] },
{ "key" : "t5", "mvattr" : [ "i am alfa", "beta here too" ] } ]
ret = cls.coll.insert_many(docs)
def assertResultIs(self, query, l):
"""
Local assert function to test a query against the expected result.
We do not move the common '{"mvattr" : {"$regex" : _ }}' here because
this would actually make tests more difficult to understand.
"""
# type(self) to properly access class attributes
cur = type(self).coll.find(query, {"key" : 1})
hits = [ doc['key'] for doc in cur ]
self.assertEqual(len(hits), len(l))
for k in l:
self.assertIn(k, hits)
def test_contains_one(self):
self.assertResultIs(
# all records where one is at least one if the values
{"mvattr" : {"$regex" : "one"}},
["t1", "t2", "t3", "t4"])
def test_starts_with_one(self):
self.assertResultIs(
# all records where one of the values starts with one
{"mvattr" : {"$regex" : "^one"}},
["t1", "t3"])
def test_ends_with_one(self):
self.assertResultIs(
# all records where one of the values ends with one
{"mvattr" : {"$regex" : "one$"}},
["t1", "t2", "t3", "t4"])
def test_contains_space(self):
self.assertResultIs(
# more simple syntax when using backslashes
{"mvattr" : {"$regex" : r'\s'}},
["t2", "t4", "t5"])
def test_one_or_two(self):
self.assertResultIs(
# $in selects full values from multivalue attribute, no regex possible
{"mvattr" : {"$in" : ['one', 'two']}},
["t1", "t3"])
def test_contains_not_or_but(self):
self.assertResultIs(
# | in regex acts a little bit like $in with SAL LIKE...
{"mvattr" : {"$regex" : r'not|but'}},
["t2", "t4"])
def test_ops_question_1(self):
self.assertResultIs(
# the original question, however, was regarding AND :3
{"$and" : [
{"mvattr" : {"$regex" : r'alfa'}},
{"mvattr" : {"$regex" : r'beta'}}
]},
["t5"])
def test_ops_question_2(self):
self.assertResultIs(
# is it really AND?
{"$and" : [
{"mvattr" : {"$regex" : r'alfa'}},
{"mvattr" : {"$regex" : r'one'}}
]},
[])
@classmethod
def tearDownClass(cls):
# tidy up
cls.coll.drop()
cls.client.close()
if __name__ == '__main__':
# increase verbosity to see which tests are processed
unittest.main(verbosity=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment