Created
June 28, 2013 08:52
-
-
Save bdelbosc/5883399 to your computer and use it in GitHub Desktop.
Dummy script to generate Nuxeo SQL dump that can be used for mass import.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
1m docs gen in 6s | |
""" | |
#NB_FOLDER=50000 | |
#NB_FILE=1000000 | |
#NB_FOLDER= 1000*1000 | |
#NB_FILE= 100*1000*1000 | |
NB_FILE=1000 | |
NB_FOLDER=50 | |
def getParentForDoc(doc_num): | |
global NB_FOLDER | |
return doc_num % NB_FOLDER | |
def getParentForFolder(i): | |
if i == 0: | |
return None | |
if i < 10: | |
return 0 | |
return int(str(i)[:-1]) | |
def genHierarchyFolder(fid): | |
pfid = getParentForFolder(fid) | |
if pfid is None: | |
pfid = '\N' | |
else: | |
pfid = 'ffffffff-ffff-ffff-0000-%12.12d' % pfid | |
print "ffffffff-ffff-ffff-0000-%12.12d\t%s\t\N\tfolder-%4.4d\tf\tFolder\t\N\t\N\t\N\t\N" % (fid, pfid, fid) | |
def genHierarchyDoc(fid, did): | |
print "dddddddd-dddd-dddd-0000-%12.12d\tffffffff-ffff-ffff-0000-%12.12d\t\N\tfile-%10.10d\tf\tFile\t\N\t\N\t1\t0" % (did, fid, did/2) | |
print "cccccccc-cccc-cccc-0000-%12.12d\tdddddddd-dddd-dddd-0000-%12.12d\t\N\tcontent\tt\tcontent\t\N\t\N\t\N\t\N" % (did+1, did) | |
def genHierarchy(): | |
global NB_FILE | |
global NB_FOLDER | |
print "COPY hierarchy FROM stdin;" | |
i = 0 | |
for fid in xrange(NB_FOLDER): | |
genHierarchyFolder(fid) | |
for did in xrange(NB_FILE/NB_FOLDER): | |
genHierarchyDoc(fid, i*2) | |
i += 1 | |
print "\\." | |
def genDublincore(): | |
global NB_FILE | |
global NB_FOLDER | |
print "COPY dublincore FROM stdin;" | |
for did in xrange(0,NB_FILE*2,2): | |
description = "description %d" % did | |
title = "title %d" % did | |
print "dddddddd-dddd-dddd-0000-%12.12d\tAdministrator\t\N\t2010-06-01 06:23:47.474\t%s\t\N\t\N\t\N\t\N\t2010-06-01 06:23:47.474\t\N\t\N\t\N\t%s" % (did, description, title) | |
for fid in xrange(NB_FOLDER): | |
description = "description %d" % fid | |
title = "title %d" % fid | |
print "ffffffff-ffff-ffff-0000-%12.12d\tAdministrator\t\N\t2010-06-01 06:23:47.474\t%s\t\N\t\N\t\N\t\N\t2010-06-01 06:23:47.474\t\N\t\N\t\N\t%s" % (fid, description, title) | |
print "\\." | |
def genContent(): | |
global NB_FILE | |
print "COPY content FROM stdin;" | |
for did in xrange(1,NB_FILE*2,2): | |
print "cccccccc-cccc-cccc-0000-%12.12d\tfile-%d.odt\t5980\tfecb537c49dc544e28b425e0b1c3e06b\t\N\t\N\tapplication/vnd.oasis.opendocument.text" % (did, (did -1)/2) | |
print "\\." | |
def genUid(): | |
global NB_FILE | |
print "COPY uid FROM stdin;" | |
for did in xrange(0, NB_FILE*2, 2): | |
print "dddddddd-dddd-dddd-0000-%12.12d\t\N" % did | |
for fid in xrange(NB_FOLDER): | |
print "ffffffff-ffff-ffff-0000-%12.12d\t\N" % fid | |
print "\\." | |
def genMisc(): | |
global NB_FILE | |
print "COPY misc FROM stdin;" | |
for did in xrange(0, NB_FILE*2, 2): | |
print "dddddddd-dddd-dddd-0000-%12.12d\tdefautl\Nproject\tt\t\N\t\N" % did | |
print "\\." | |
def genContributor(): | |
global NB_FILE | |
global NB_FOLDER | |
print "COPY dc_contributors FROM stdin;" | |
for did in xrange(0, NB_FILE*2, 2): | |
print "dddddddd-dddd-dddd-0000-%12.12d\t0\tsystem" % did | |
for fid in xrange(NB_FOLDER): | |
print "ffffffff-ffff-ffff-0000-%12.12d\t0\tsystem" % fid | |
print "\\." | |
def genFile(): | |
global NB_FILE | |
print "COPY file FROM stdin;" | |
for did in xrange(0, NB_FILE*2, 2): | |
print "dddddddd-dddd-dddd-0000-%12.12d\tfile-%d.odt" % (did, did/2) | |
print "\\." | |
def printDDL(): | |
print """CREATE TABLE hierarchy ( | |
id character varying(36) NOT NULL, | |
parentid character varying(36), | |
pos integer, | |
name character varying, | |
isproperty boolean, | |
primarytype character varying(250), | |
ischeckedin boolean, | |
baseversionid character varying(36), | |
majorversion bigint, | |
minorversion bigint | |
); | |
CREATE TABLE dublincore ( | |
id character varying(36) NOT NULL, | |
creator character varying, | |
source character varying, | |
created timestamp without time zone, | |
description character varying, | |
rights character varying, | |
"valid" timestamp without time zone, | |
format character varying, | |
issued timestamp without time zone, | |
modified timestamp without time zone, | |
"language" character varying, | |
expired timestamp without time zone, | |
coverage character varying, | |
title character varying | |
); | |
""" | |
def printDDL2(): | |
print """CREATE TABLE content ( | |
id character varying(36) NOT NULL, | |
name character varying, | |
length bigint, | |
data character varying(40), | |
"encoding" character varying, | |
digest character varying, | |
"mime-type" character varying | |
); | |
CREATE TABLE uid ( | |
id character varying(36) NOT NULL, | |
uid character varying | |
); | |
CREATE TABLE dc_contributors ( | |
id character varying(36) NOT NULL, | |
pos integer, | |
item character varying | |
); | |
CREATE TABLE file ( | |
id character varying(36) NOT NULL, | |
filename character varying | |
); | |
""" | |
printDDL() | |
genHierarchy() | |
genDublincore() | |
printDDL2() | |
genContent() | |
genUid() | |
genContributor() | |
genFile() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment