Last active
April 3, 2016 17:18
-
-
Save oubiga/8ad20d9aab9670b17480a3b83b4b2231 to your computer and use it in GitHub Desktop.
Used in Stack Overflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
"""Parses bc3 files and insert all the data into the database.""" | |
import re | |
from enkendas.models import Version, Concept, Deco, Text | |
from .utils import optional_codes, parse_dates | |
# regex stuff | |
# parsers stuff | |
concepts = {} | |
decos = {} | |
# decos = {'PER02': [('Qexcav', '1', '231.13'), ('Qzanj', '1', '34.5'), | |
# ('Qexcav2', '1', '19.07'), ('Qrelltras', '1', '19.07')], | |
# ... | |
# 'Qexcav': [('MMMT.3c', '1', '0.045'), ('O01OA070', '1', '0.054'), | |
# ('M07CB030', '1', '0.036'), ('%0300', '1', '0.03')]} | |
def dispatch_record(record): | |
""" | |
Dispatch every record. | |
Check the first character of the record and send it to the proper function. | |
""" | |
if record.startswith('D'): | |
parse_decomp(record) | |
elif record.startswith('V'): | |
parse_version(record) | |
elif record.startswith('C'): | |
parse_concept(record) | |
elif record.startswith('T'): | |
parse_text(record) | |
else: | |
pass | |
def parse_file(file): | |
""" | |
Parse the whole file. | |
file is a generator returned by file.chunks(chunk_size=80000) in views.py. | |
""" | |
while True: | |
try: | |
record = '' | |
incomplete_record = '' | |
# Iterates over the file sent by the user. | |
byte_string = next(file) | |
byte_stripped_string = byte_string.strip() | |
string = byte_stripped_string.decode(encoding='ISO-8859-1') | |
# List of records. | |
durty_strings_list = string.split('~') | |
# Check if one chunk in chunks is complete. | |
if durty_strings_list[-1] != '' and incomplete_record != '': | |
incomplete_record = incomplete_record + durty_strings_list.pop(-1) | |
dispatch_record(incomplete_record) | |
incomplete_record = '' | |
elif durty_strings_list[-1] != '' and incomplete_record == '': | |
incomplete_record = durty_strings_list.pop(-1) | |
for durty_string in durty_strings_list: | |
stripped_string = durty_string.strip() | |
if durty_string == '': | |
record = record + '' | |
# TODO: I didn't create a regex for 'M' and 'E' records yet. | |
elif durty_string[0] == 'M' or durty_string[0] == 'E': | |
continue | |
if record != '': | |
# Dispatch the previous record. | |
dispatch_record(record) | |
# Reset the used record. | |
record = '' | |
# Assign the current record. | |
record = stripped_string | |
else: | |
record = record + stripped_string | |
except StopIteration as e: | |
dispatch_record(record) | |
break | |
concept_instances = [] | |
for key_code, data in concepts.items(): | |
code = key_code | |
root = chapter = parent = False | |
if len(key_code) > 2 and key_code[-2:] == '##': | |
root = True | |
code = key_code[:-2] | |
elif len(key_code) > 1 and key_code[-1:] == '#': | |
chapter = True | |
code = key_code[:-1] | |
if code in decos: | |
parent = True | |
concept = Concept(code=code, root=root, chapter=chapter, parent=parent, | |
unit=data['unit'], summary=data['summary'], | |
price=data['price'], date=data['date'], | |
concept_type=data['concept_type']) | |
concept_instances.append(concept) | |
Concept.objects.bulk_create(concept_instances) | |
deco_instances = [] | |
cobjs_storage = {} | |
for concept in Concept.objects.all(): | |
if concept.parent is False: | |
continue | |
dec = decos[concept.code] | |
for child, factor, efficiency in dec: | |
if child == '': | |
continue | |
if factor == '': | |
factor = '0.000' | |
if efficiency == '': | |
efficiency = '0.000' | |
# To avoid extra queries. | |
if child in cobjs_storage: | |
cobj = cobjs_storage[child] | |
else: | |
cobj = Concept.objects.get(code=child) | |
cobjs_storage.update({child: cobj}) | |
deco = Deco(parent_concept=concept, concept=cobj, | |
factor=float(factor), efficiency=float(efficiency)) | |
deco_instances.append(deco) | |
decos.pop(concept.code, None) | |
Deco.objects.bulk_create(deco_instances) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment