Skip to content

Instantly share code, notes, and snippets.

@bdelbosc
Created June 28, 2013 08:52
Show Gist options
  • Save bdelbosc/5883399 to your computer and use it in GitHub Desktop.
Save bdelbosc/5883399 to your computer and use it in GitHub Desktop.
Dummy script to generate Nuxeo SQL dump that can be used for mass import.
#!/usr/bin/python
"""
1m docs gen in 6s
"""
#NB_FOLDER=50000
#NB_FILE=1000000
#NB_FOLDER= 1000*1000
#NB_FILE= 100*1000*1000
NB_FILE=1000
NB_FOLDER=50
def getParentForDoc(doc_num):
global NB_FOLDER
return doc_num % NB_FOLDER
def getParentForFolder(i):
if i == 0:
return None
if i < 10:
return 0
return int(str(i)[:-1])
def genHierarchyFolder(fid):
pfid = getParentForFolder(fid)
if pfid is None:
pfid = '\N'
else:
pfid = 'ffffffff-ffff-ffff-0000-%12.12d' % pfid
print "ffffffff-ffff-ffff-0000-%12.12d\t%s\t\N\tfolder-%4.4d\tf\tFolder\t\N\t\N\t\N\t\N" % (fid, pfid, fid)
def genHierarchyDoc(fid, did):
print "dddddddd-dddd-dddd-0000-%12.12d\tffffffff-ffff-ffff-0000-%12.12d\t\N\tfile-%10.10d\tf\tFile\t\N\t\N\t1\t0" % (did, fid, did/2)
print "cccccccc-cccc-cccc-0000-%12.12d\tdddddddd-dddd-dddd-0000-%12.12d\t\N\tcontent\tt\tcontent\t\N\t\N\t\N\t\N" % (did+1, did)
def genHierarchy():
global NB_FILE
global NB_FOLDER
print "COPY hierarchy FROM stdin;"
i = 0
for fid in xrange(NB_FOLDER):
genHierarchyFolder(fid)
for did in xrange(NB_FILE/NB_FOLDER):
genHierarchyDoc(fid, i*2)
i += 1
print "\\."
def genDublincore():
global NB_FILE
global NB_FOLDER
print "COPY dublincore FROM stdin;"
for did in xrange(0,NB_FILE*2,2):
description = "description %d" % did
title = "title %d" % did
print "dddddddd-dddd-dddd-0000-%12.12d\tAdministrator\t\N\t2010-06-01 06:23:47.474\t%s\t\N\t\N\t\N\t\N\t2010-06-01 06:23:47.474\t\N\t\N\t\N\t%s" % (did, description, title)
for fid in xrange(NB_FOLDER):
description = "description %d" % fid
title = "title %d" % fid
print "ffffffff-ffff-ffff-0000-%12.12d\tAdministrator\t\N\t2010-06-01 06:23:47.474\t%s\t\N\t\N\t\N\t\N\t2010-06-01 06:23:47.474\t\N\t\N\t\N\t%s" % (fid, description, title)
print "\\."
def genContent():
global NB_FILE
print "COPY content FROM stdin;"
for did in xrange(1,NB_FILE*2,2):
print "cccccccc-cccc-cccc-0000-%12.12d\tfile-%d.odt\t5980\tfecb537c49dc544e28b425e0b1c3e06b\t\N\t\N\tapplication/vnd.oasis.opendocument.text" % (did, (did -1)/2)
print "\\."
def genUid():
global NB_FILE
print "COPY uid FROM stdin;"
for did in xrange(0, NB_FILE*2, 2):
print "dddddddd-dddd-dddd-0000-%12.12d\t\N" % did
for fid in xrange(NB_FOLDER):
print "ffffffff-ffff-ffff-0000-%12.12d\t\N" % fid
print "\\."
def genMisc():
global NB_FILE
print "COPY misc FROM stdin;"
for did in xrange(0, NB_FILE*2, 2):
print "dddddddd-dddd-dddd-0000-%12.12d\tdefautl\Nproject\tt\t\N\t\N" % did
print "\\."
def genContributor():
global NB_FILE
global NB_FOLDER
print "COPY dc_contributors FROM stdin;"
for did in xrange(0, NB_FILE*2, 2):
print "dddddddd-dddd-dddd-0000-%12.12d\t0\tsystem" % did
for fid in xrange(NB_FOLDER):
print "ffffffff-ffff-ffff-0000-%12.12d\t0\tsystem" % fid
print "\\."
def genFile():
global NB_FILE
print "COPY file FROM stdin;"
for did in xrange(0, NB_FILE*2, 2):
print "dddddddd-dddd-dddd-0000-%12.12d\tfile-%d.odt" % (did, did/2)
print "\\."
def printDDL():
print """CREATE TABLE hierarchy (
id character varying(36) NOT NULL,
parentid character varying(36),
pos integer,
name character varying,
isproperty boolean,
primarytype character varying(250),
ischeckedin boolean,
baseversionid character varying(36),
majorversion bigint,
minorversion bigint
);
CREATE TABLE dublincore (
id character varying(36) NOT NULL,
creator character varying,
source character varying,
created timestamp without time zone,
description character varying,
rights character varying,
"valid" timestamp without time zone,
format character varying,
issued timestamp without time zone,
modified timestamp without time zone,
"language" character varying,
expired timestamp without time zone,
coverage character varying,
title character varying
);
"""
def printDDL2():
print """CREATE TABLE content (
id character varying(36) NOT NULL,
name character varying,
length bigint,
data character varying(40),
"encoding" character varying,
digest character varying,
"mime-type" character varying
);
CREATE TABLE uid (
id character varying(36) NOT NULL,
uid character varying
);
CREATE TABLE dc_contributors (
id character varying(36) NOT NULL,
pos integer,
item character varying
);
CREATE TABLE file (
id character varying(36) NOT NULL,
filename character varying
);
"""
printDDL()
genHierarchy()
genDublincore()
printDDL2()
genContent()
genUid()
genContributor()
genFile()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment