Skip to content

Instantly share code, notes, and snippets.

@theiostream
Created December 7, 2019 02:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save theiostream/53d191fa7ee258a43fbcc7fb0567b7b2 to your computer and use it in GitHub Desktop.
Save theiostream/53d191fa7ee258a43fbcc7fb0567b7b2 to your computer and use it in GitHub Desktop.
Tentatively converts IdeaList .TEX file to CSV
#!/usr/bin/python
# converts Blackwell IdeaList .TEX file to CSV
# (c) 2019 Daniel Ferreira
from array import array
from datetime import date
import csv
FIELD_TYPE_STRING = 0
FIELD_TYPE_DATE = 1
STATE_START = 0
STATE_READING_FIELD_SPEC = 1
STATE_READING_FIELD_DATA = 2
NICE_KEY = {
'8506': 'name',
'0b7f': 'title',
'c903': 'christname',
'2103': 'date',
'd619': 'type',
'ab3d': 'subject',
'0203': 'bill_name',
'0433': 'hoppit_subj',
'840c': 'question',
'a807': 'procon',
'0000': 'other'
}
def round_down(n, div):
return n - (n % div)
# The date format here is weird.
def parse_date(dd):
date_byte1 = dd[2]
date_byte2 = dd[3]
year = int(((date_byte1 - 0x34) / 2) + 1690)
byte1_odd = date_byte1 % 2 != 0
rd = round_down(date_byte2, 0x20)
month_ = int(rd / 0x20)
if byte1_odd is True:
month = month_ + 8
else:
month = month_
day = date_byte2 - rd
return date(year, month, day).strftime('%Y-%m-%d')
with open('JOURNALS.TEX', 'rb') as f:
data = array('B', f.read())
final_len = len(data)
output = []
cum_obj = {}
state = STATE_START
idx = 0
field_type = FIELD_TYPE_STRING
field_len = 0
while True:
if idx >= final_len:
break
if state == STATE_START:
if data[idx+8] != 0xab:
idx += 1
continue
cum_obj['id'] = bytes(data[idx:idx+3]).hex()
state = STATE_READING_FIELD_SPEC
idx += 10
elif state == STATE_READING_FIELD_SPEC:
if data[idx] == 0x57 and data[idx+1] == 0x20:
field_type = FIELD_TYPE_STRING
elif data[idx] == 0x44 and data[idx+1] == 0x01:
field_type = FIELD_TYPE_DATE
else:
raise Exception('bad field type')
field_len = data[idx+2]
state = STATE_READING_FIELD_DATA
idx += 4
elif state == STATE_READING_FIELD_DATA:
key = NICE_KEY[bytes(data[idx+field_len:idx+field_len+2]).hex()]
if field_type == FIELD_TYPE_STRING:
cum_obj[key] = bytes(data[idx:idx+field_len]).decode('utf-8', errors = 'ignore')
elif field_type == FIELD_TYPE_DATE:
cum_obj[key] = parse_date(data[idx:idx+field_len])
else:
raise Exception('Bad field type')
if data[idx+field_len+2] == 0x00:
output.append(cum_obj)
cum_obj = {}
state = STATE_START
idx += field_len + 2 + 13
else:
state = STATE_READING_FIELD_SPEC
idx += field_len + 2
keys = output[0].keys()
with open('output.csv', 'w') as oupf:
dict_writer = csv.DictWriter(oupf, keys)
dict_writer.writeheader()
dict_writer.writerows(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment