Skip to content

Instantly share code, notes, and snippets.

@epoz
Created October 19, 2018 11:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save epoz/3dd071f603e74c05658f54fc0abe2db6 to your computer and use it in GitHub Desktop.
Save epoz/3dd071f603e74c05658f54fc0abe2db6 to your computer and use it in GitHub Desktop.
Dumps a collection of dbtxt files to a single CSV file, including text of IC field expanded
from __future__ import print_function
# Export details from a collection of dmp files found at a certain path to a csv file
import os
import iconclass
import sys
from progress.bar import Bar
import textbase
print('Reading files...')
files = []
for dirpath, dirnames, filenames in os.walk(sys.argv[1]):
for filename in filenames:
if not filename.endswith('.dmp'):
continue
files.append(os.path.join(dirpath, filename))
bar = Bar('Reading files', max=len(files))
data = {}
for x in files:
bar.next()
try:
parsed = textbase.parse(x)
except:
print('Problem parsing %s' % x)
continue
for xx in parsed:
xx['FILENAME'] = [x]
data.setdefault(xx['ID'][0], {}).update(xx)
bar.finish()
bar = Bar('Dumping rows', max=len(data))
clist = []
for obj in data.values():
bar.next()
if 'PARENT' in obj:
for k,v in data.get(obj['PARENT'][0], {}).items():
if k not in ('ID', 'IC', 'URL.IMAGE'):
obj[k] = v
if ('IC' in obj) and ('URL.IMAGE' in obj):
try:
z = obj['URL.IMAGE'][0], obj['ID'][0], obj.get('ID.INV', [''])[0], '|'.join(['%s %s' % (i, iconclass.get(i)['txt']['en']) for i in obj['IC']])
except:
print('Problem with %s' % obj)
continue
clist.append(z)
bar.finish()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment