Skip to content

Instantly share code, notes, and snippets.

@copyleftdev
Created February 26, 2016 15:57
Show Gist options
  • Save copyleftdev/3d3c2f4d4ac705a9bec4 to your computer and use it in GitHub Desktop.
Save copyleftdev/3d3c2f4d4ac705a9bec4 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import random
import os
from faker import Factory
import datetime
fake = Factory.create()
def getDocList(rootFoldr):
filelst = []
for(dirpath, _, files) in os.walk(rootFoldr):
for filename in files:
filepath = os.path.join(dirpath, filename)
filelst.append(filepath)
return filelst
def seedDocs(docCount, rootFoldr):
piiProvider = [fake.credit_card_number(), fake.ssn()]
docslst = random.sample(getDocList(rootFoldr), docCount)
startime = datetime.datetime.now()
for each_doc in docslst:
with open(each_doc,'a+') as fn:
fn.write(' PII VECTOR ( {} )'.format(random.choice(piiProvider)))
with open('piiseedmap.txt','a+') as logfn:
logfn.write(each_doc + '\n')
endtime = datetime.datetime.now()
elapsedtime = endtime - startime
print "Pii Seeding {} documents complete. elapsed time: {} microseconds".format(docCount, elapsedtime.microseconds)
seedDocs(20000, 'sample')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment