Skip to content

Instantly share code, notes, and snippets.

@oubiga
Last active April 3, 2016 17:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oubiga/8ad20d9aab9670b17480a3b83b4b2231 to your computer and use it in GitHub Desktop.
Save oubiga/8ad20d9aab9670b17480a3b83b4b2231 to your computer and use it in GitHub Desktop.
Used in Stack Overflow
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Parses bc3 files and insert all the data into the database."""
import re
from enkendas.models import Version, Concept, Deco, Text
from .utils import optional_codes, parse_dates
# regex stuff
# parsers stuff
concepts = {}
decos = {}
# decos = {'PER02': [('Qexcav', '1', '231.13'), ('Qzanj', '1', '34.5'),
# ('Qexcav2', '1', '19.07'), ('Qrelltras', '1', '19.07')],
# ...
# 'Qexcav': [('MMMT.3c', '1', '0.045'), ('O01OA070', '1', '0.054'),
# ('M07CB030', '1', '0.036'), ('%0300', '1', '0.03')]}
def dispatch_record(record):
"""
Dispatch every record.
Check the first character of the record and send it to the proper function.
"""
if record.startswith('D'):
parse_decomp(record)
elif record.startswith('V'):
parse_version(record)
elif record.startswith('C'):
parse_concept(record)
elif record.startswith('T'):
parse_text(record)
else:
pass
def parse_file(file):
"""
Parse the whole file.
file is a generator returned by file.chunks(chunk_size=80000) in views.py.
"""
while True:
try:
record = ''
incomplete_record = ''
# Iterates over the file sent by the user.
byte_string = next(file)
byte_stripped_string = byte_string.strip()
string = byte_stripped_string.decode(encoding='ISO-8859-1')
# List of records.
durty_strings_list = string.split('~')
# Check if one chunk in chunks is complete.
if durty_strings_list[-1] != '' and incomplete_record != '':
incomplete_record = incomplete_record + durty_strings_list.pop(-1)
dispatch_record(incomplete_record)
incomplete_record = ''
elif durty_strings_list[-1] != '' and incomplete_record == '':
incomplete_record = durty_strings_list.pop(-1)
for durty_string in durty_strings_list:
stripped_string = durty_string.strip()
if durty_string == '':
record = record + ''
# TODO: I didn't create a regex for 'M' and 'E' records yet.
elif durty_string[0] == 'M' or durty_string[0] == 'E':
continue
if record != '':
# Dispatch the previous record.
dispatch_record(record)
# Reset the used record.
record = ''
# Assign the current record.
record = stripped_string
else:
record = record + stripped_string
except StopIteration as e:
dispatch_record(record)
break
concept_instances = []
for key_code, data in concepts.items():
code = key_code
root = chapter = parent = False
if len(key_code) > 2 and key_code[-2:] == '##':
root = True
code = key_code[:-2]
elif len(key_code) > 1 and key_code[-1:] == '#':
chapter = True
code = key_code[:-1]
if code in decos:
parent = True
concept = Concept(code=code, root=root, chapter=chapter, parent=parent,
unit=data['unit'], summary=data['summary'],
price=data['price'], date=data['date'],
concept_type=data['concept_type'])
concept_instances.append(concept)
Concept.objects.bulk_create(concept_instances)
deco_instances = []
cobjs_storage = {}
for concept in Concept.objects.all():
if concept.parent is False:
continue
dec = decos[concept.code]
for child, factor, efficiency in dec:
if child == '':
continue
if factor == '':
factor = '0.000'
if efficiency == '':
efficiency = '0.000'
# To avoid extra queries.
if child in cobjs_storage:
cobj = cobjs_storage[child]
else:
cobj = Concept.objects.get(code=child)
cobjs_storage.update({child: cobj})
deco = Deco(parent_concept=concept, concept=cobj,
factor=float(factor), efficiency=float(efficiency))
deco_instances.append(deco)
decos.pop(concept.code, None)
Deco.objects.bulk_create(deco_instances)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment