Skip to content

Instantly share code, notes, and snippets.

@prashantdce19
Last active August 29, 2015 14:04
Show Gist options
  • Save prashantdce19/8e456bd4cfefa1dc8d11 to your computer and use it in GitHub Desktop.
Save prashantdce19/8e456bd4cfefa1dc8d11 to your computer and use it in GitHub Desktop.
copy the convflowdev database to convflowdev_trial
import pymongo
import datetime
from bson.objectid import ObjectId
import random
import re
import json
import urllib
client = MongoClient('localhost',27017)
db = client['convflowdev_trial']
problems = db['problems']
users = db['users']
users.remove({})
problems.remove({})
openjsonfile = json.load(urllib.urlopen("https://www.dropbox.com/s/xmtdj443ezjxumh/1.json")
questions = [i['property2']['text'] for i in openjsonfile['collection2']]
day = range(1,28)
month = range(1,12)
year = 2014
hour = range(24)
minutes = range(60)
seconds = range(60)
stopwords = ['a','able','about','across','after','all','almost','also','am','among','an','and','any','are','as','at','be','because','been','but','by','can','cannot','could','dear','did','do','does','either','else','ever','every','for','from','get','got','had','has','have','he','her','hers','him','his','how','however','i','if','in','into','is','it','its','just','least','let','like','likely','may','me','might','most','must','my','neither','no','nor','not','of','off','often','on','only','or','other','our','own','rather','said','say','says','she','should','since','so','some','than','that','the','their','them','then','there','these','they','this','tis','to','too','twas','us','wants','was','we','were','what','when','where','which','while','who','whom','why','will','with','would','yet','you','your']
remove = '|'.join(stopwords)
names = ['Dorris Silverman','Antonina Britt','Roseline Griggs','Hollie Seaman','Whitney Byrne','Giselle Skelton','Velda Mccormack','Antonia Ivory','Rosita Akins','Mickey Montague','Corina Thigpen','Charlena Irving','Yuri Sager','Daphine Dewitt','Berry Sandlin','Moshe Henson','Tena Westfall','Enriqueta Bethel','Bruna Winston','Kallie Benedict','Louis Bloom','Phoebe Hood','Reena Naquin','Beau Alfaro','Johnie Shelby','Merlin Lutz','Jalisa Royer','Chi Lafleur','Roni Luckett','Nery Darling','Jone Poland','Suk Carranza','Rolf Chacon','Eura Peter','Rubie Rains','Romelia Marble','Queenie Knudsen','Andria Mccain','Oma Swift','Vonda Autry','India Mccracken','Stevie Pridgen','Kellee Lopes','Santina Fife','Tanja Jeter','Dallas Willey','Elma Damon','Jeniffer Faust','Lashawna Muniz','August Binkley','yrna Lively','Anglea Spicer','Gilda Whitehead','Karri Andre','Grover Verdin','Brittney Swenson','Junie Healy','Aja Light','Justina Baumgartner','Aubrey Buffington','Vincenza Emmons','Isela Vogel','Catarina Ferrer','Marty Staton','Kiesha Woodson','Denita Kohler','Nannette Searcy','Dahlia Kohn','Cole Montalvo','Karry Gale','Celine Mcmillen','Terisa Cato','Odilia Mackey','Remona Holm','Providencia Chapin','Yuk Elmore','Emile Peel','Cristie Millard','Jenni Main','Randell Grubbs','Suzan Inman','Angelita Denson','Tuan Mckeever','Mitchell Humphreys','Jerry Fuentes','Virgie Callaway','Liberty Epperson','Reuben Mckenna','Marg Sumpter','Beryl Devries','Trina Orlando','Tamela Heckman','Denis Anaya','Nisha Eubanks','Antionette Shackelford','Norah Iverson','Melania Willoughby','Hosea Contreras','Carmela Bankston','Glennie Mcmurray']
emails = ["molestie.orci@idantedictum.ca","eros@sedsapien.ca","sed.est.Nunc@mollis.edu","mauris.aliquam@vestibulumMauris.com","id@quamPellentesque.ca","est.Nunc.laoreet@cursuset.ca","ullamcorper.viverra.Maecenas@nonsapien.com","consectetuer.ipsum@Fuscediamnunc.net","dictum.sapien@vitaealiquetnec.org","Donec@senectusetnetus.com","enim@ipsumSuspendisse.co.uk","auctor@at.co.uk","parturient.montes.nascetur@tempordiam.co.uk","purus.sapien.gravida@magnaaneque.com","vitae.erat.Vivamus@laciniaSed.net","amet.consectetuer.adipiscing@risus.co.uk","massa.Mauris.vestibulum@per.co.uk","vel@estMauris.co.uk","Fusce.diam@ipsumnuncid.ca","et@CraspellentesqueSed.net","neque@semegetmassa.edu","Aliquam.ultrices.iaculis@nostra.ca","elit.pharetra.ut@sitametorci.net","interdum.enim@musDonecdignissim.edu","aliquet.diam@vestibulum.com","scelerisque@libero.co.uk","primis@ultriciesdignissim.ca","eros.turpis.non@placeratCrasdictum.net","id@etarcuimperdiet.org","Donec@pedeCum.co.uk","faucibus.orci@Phasellusvitaemauris.co.uk","Nunc@condimentum.net","Aenean@cursusInteger.co.uk","ac.feugiat.non@euodioPhasellus.co.uk","Phasellus.nulla.Integer@auctorvelit.edu","tellus.imperdiet.non@ante.org","In.mi.pede@disparturient.ca","Nulla.interdum@molestiedapibus.ca","nec.urna@lorem.co.uk","lectus.convallis.est@dolorsit.ca","ut.odio@cubilia.com","cursus.in@diamnunc.com","nisi@nullamagna.net","ultrices.sit.amet@Phasellusdapibus.org","sit.amet@aliquetlibero.ca","aliquet@sociisnatoque.co.uk","risus@augueSed.net","Integer.id@dictumPhasellusin.edu","ullamcorper@milaciniamattis.co.uk","metus.eu.erat@acsem.ca","sit.amet.orci@feugiatnec.net","egestas@sitametornare.org","montes.nascetur.ridiculus@semsemper.org","lorem.Donec@Mauris.co.uk","Integer.urna.Vivamus@duiCras.co.uk","eu.euismod.ac@gravidanonsollicitudin.net","Pellentesque.habitant.morbi@quisdiam.co.uk","ac.mattis@Integeraliquamadipiscing.ca","risus.varius.orci@Nunc.ca","Nulla.eu.neque@consectetuer.edu","arcu.Vestibulum@nunc.co.uk","erat.nonummy.ultricies@diam.ca","faucibus.id.libero@ipsumSuspendisse.org","vitae.purus.gravida@Phasellusnulla.com","semper.pretium@estarcuac.org","eget.odio@nec.com","metus@habitantmorbi.edu","hymenaeos@sitametornare.com","posuere.vulputate@arcu.net","eget@ligulaDonecluctus.org","Fusce.mollis@libero.com","erat@erat.edu","facilisis@ametnullaDonec.com","dolor.nonummy.ac@tinciduntcongue.com","lectus@enim.ca","non@nequesedsem.ca","vel.lectus.Cum@ullamcorper.com","massa.lobortis.ultrices@tempus.org","et@antelectusconvallis.ca","erat.Etiam.vestibulum@velitduisemper.com","metus.In.nec@maurisaliquam.com","purus.in.molestie@semmagna.org","sapien.molestie@pellentesquemassalobortis.edu","lobortis.quis@volutpatnuncsit.com","malesuada.id@aliquameu.edu","sagittis.semper.Nam@iaculisaliquetdiam.edu","magna.Lorem@Pellentesquehabitant.net","erat@aliquetmagnaa.com","vulputate.nisi@ipsum.edu","malesuada.vel.venenatis@sedpedenec.edu","semper.auctor.Mauris@vitaesemper.net","Mauris.quis.turpis@lorem.com","lacus@senectus.edu","Curabitur.sed@Aliquamadipiscing.ca","nec@Praesent.edu","dolor@nec.org","In@pede.org","Duis.dignissim.tempor@Crasdictum.edu","montes.nascetur.ridiculus@lorem.edu","faucibus.ut.nulla@pedeCum.edu"]
organization_id = ObjectId()
def get_voters(ids):
number = random.choice(range(15,30))
return random.sample(ids,number)
def timeCreated():
return datetime.datetime(year, random.choice(month), random.choice(day), random.choice(hour), random.choice(minutes), random.choice(seconds),0)
def remove_number(txt):
return re.sub("^\d+\s|\s\d+\s|\s\d+$", " ", txt)
def remove_stopwords(txt):
regex = re.compile(r'\b('+remove+r')\b', flags=re.IGNORECASE)
return regex.sub("", re.sub(r'[^\w]', ' ', txt))
def remove_alphanumeric(txt):
return re.sub(r'[^\w]', ' ', txt)
def keywords(txt):
rm_alphanum = remove_alphanumeric(txt)
rm_numbers = remove_number(txt)
rm_stopwords = remove_stopwords(txt)
return rm_stopwords.split()
def create_users():
global emails, names, Boolean
users_added = []
for i in range(99):
email_id = random.choice(emails)
name = random.choice(names)
user = {u'username': email_id,
u'timeCreated':timeCreated(),
u'displayname':name,
u'organization':organization_id,}
users_added.append(user)
emails.remove(email_id)
names.remove(name)
users.insert(users_added)
print 'Users Created'
def create_problems():
global questions
real_ids = [a for a in users.find({},{'_id':1})]
ids = [b['_id'] for b in real_ids]
problems_added = []
for i in range(100):
creator = random.choice(ids)
question = random.choice(questions)
voters = [i for i in get_voters(ids) if i!=creator]
commented_users = list(set([i for i in get_voters(ids) if i!=creator] + voters ))
problem = {u'voters':voters,
u'creationtime':timeCreated(),
u'title':question,
u'keywords':keywords(question),
u'creator':creator,
u'acl':{u'mode':random.choice([u'public',u'private']),u'users':commented_users,u'groups':[]}
}
questions.remove(question)
problems_added.append(problem)
problems.insert(problems_added)
print 'Problems Created'
def generate_data():
create_users()
create_problems()
print 'Data Generated'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment