Skip to content

Instantly share code, notes, and snippets.

@aVolpe
Created July 3, 2020 15:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aVolpe/1a7167404edb3f9f7d92cb1ee20b1a76 to your computer and use it in GitHub Desktop.
Save aVolpe/1a7167404edb3f9f7d92cb1ee20b1a76 to your computer and use it in GitHub Desktop.
Contraloria
#!/usr/bin/python3
import sys
import re
def get_file_data(file_name):
cleaned = file_name \
.replace('PERDOMO2016_1', 'PERDOMO_2016_1') \
.replace('SOSARIELLA_216', 'SOSARIELLA_2016') \
.replace('221.035', '221035') \
.replace('991712_8', '991712#8') \
.replace("_.pdf", "") \
.replace(".pdf", "") \
.strip() \
.replace("\n", "") \
.replace("-", "_") \
.replace(" ", "_") \
.replace(".", "_")
parts = cleaned.split("_")
# print('\n')
# print(file_name.replace("\n", ""))
# print(cleaned)
# print(parts)
document = parts[0]
name = ''
last = 'name'
year = '2016'
version = '1'
for part in parts[1:]:
if last == 'name':
if part.isdigit():
last = 'year'
year = part
else:
name += ' ' + part
if last == 'year':
version = part
if year == '216':
year = '2016'
return {
'file_name': file_name.replace("\n", ""),
'document': document,
'name': name.strip(),
'year': year,
'version': version
}
if len(sys.argv) < 2:
raise Exception('Usage: python3 generate_rows.py FILE')
FILE_NAME=sys.argv[1]
print("Reading file: " + FILE_NAME)
with open(FILE_NAME, "r") as file:
for line in file:
if '(1)' in line or '(2)' in line:
continue
data = get_file_data(line)
if not data['year'].isdigit() or int(data['year']) < 1990:
raise Exception('invalid year: ' + data['year'])
if not data['version'].isdigit():
raise Exception('invalid year: ' + data['version'])
print(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment