Created
October 19, 2018 11:42
-
-
Save epoz/3dd071f603e74c05658f54fc0abe2db6 to your computer and use it in GitHub Desktop.
Dumps a collection of dbtxt files to a single CSV file, including text of IC field expanded
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
# Export details from a collection of dmp files found at a certain path to a csv file | |
import os | |
import iconclass | |
import sys | |
from progress.bar import Bar | |
import textbase | |
print('Reading files...') | |
files = [] | |
for dirpath, dirnames, filenames in os.walk(sys.argv[1]): | |
for filename in filenames: | |
if not filename.endswith('.dmp'): | |
continue | |
files.append(os.path.join(dirpath, filename)) | |
bar = Bar('Reading files', max=len(files)) | |
data = {} | |
for x in files: | |
bar.next() | |
try: | |
parsed = textbase.parse(x) | |
except: | |
print('Problem parsing %s' % x) | |
continue | |
for xx in parsed: | |
xx['FILENAME'] = [x] | |
data.setdefault(xx['ID'][0], {}).update(xx) | |
bar.finish() | |
bar = Bar('Dumping rows', max=len(data)) | |
clist = [] | |
for obj in data.values(): | |
bar.next() | |
if 'PARENT' in obj: | |
for k,v in data.get(obj['PARENT'][0], {}).items(): | |
if k not in ('ID', 'IC', 'URL.IMAGE'): | |
obj[k] = v | |
if ('IC' in obj) and ('URL.IMAGE' in obj): | |
try: | |
z = obj['URL.IMAGE'][0], obj['ID'][0], obj.get('ID.INV', [''])[0], '|'.join(['%s %s' % (i, iconclass.get(i)['txt']['en']) for i in obj['IC']]) | |
except: | |
print('Problem with %s' % obj) | |
continue | |
clist.append(z) | |
bar.finish() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment