-
-
Save n-kb/5501491914bac4a472a4fe77d1a77186 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.cElementTree as ET | |
from os import listdir | |
from os.path import isfile, join | |
from datetime import datetime | |
from dateutil.relativedelta import relativedelta | |
sanctioned = {} | |
programs = [] | |
# Parses global file | |
GLOBAL_FILE = "data/global.xml" | |
e = ET.ElementTree(file=GLOBAL_FILE).getroot() | |
for entity in e.iter("sanctionEntity"): | |
date_from = entity.find("nameAlias").find("regulationSummary").get("publicationDate") | |
date_from = datetime.strptime(date_from, '%Y-%m-%d') | |
id = entity.get("logicalId") | |
programme = entity.find("regulation").get("programme") | |
name = entity.find("nameAlias").get("wholeName") | |
if programme not in programs: | |
programs.append(programme) | |
sanctioned[id] = {"date_from": date_from, "date_to":datetime.strptime("3000-01-01", '%Y-%m-%d'), "id": id, "programme": programme, "name": name} | |
# Parses other files | |
diff_files = [f for f in listdir("data/") if isfile(join("data/", f))] | |
for filename in diff_files: | |
if filename != GLOBAL_FILE: | |
e = ET.ElementTree(file="data/"+filename).getroot() | |
for entity in e.iter("ENTITY"): | |
id = entity.get("Id") | |
date = e.get("Date") | |
date = datetime.strptime(date, '%d/%m/%Y') | |
try: | |
name = entity.find("NAME").find("WHOLENAME").text | |
except AttributeError: | |
name = "None" | |
programme = entity.get("programme") | |
if programme not in programs: | |
programs.append(programme) | |
if entity.get("Id") not in sanctioned: | |
# If the sanctioned is not in the latest batch, it means it was removed at some point | |
sanctioned[id] = {"date_to": date, "date_from":datetime.strptime("1000-01-01", '%Y-%m-%d'), "id": id, "programme": programme, "name": name} | |
elif sanctioned[id]["date_from"] == datetime.strptime("1000-01-01", '%Y-%m-%d'): | |
# Sanctioned has been removed from GLOBAL, readded from the Archive ; now needs to find when it was added | |
sanctioned[id]["date_from"] = date | |
# Produces the file of sanctions by month and programme | |
date = datetime.strptime("2000-01-01", '%Y-%m-%d') | |
outcome = [] | |
while date < datetime.today(): | |
num_sanctioned = {"None": 0} | |
for programme in programs: | |
num_sanctioned[programme] = 0 | |
for id, entity in sanctioned.iteritems(): | |
if (entity["date_to"] > date) and (entity["date_from"] < date): | |
if entity["programme"] == None: | |
entity["programme"] = "None" | |
num_sanctioned[entity["programme"]] += 1 | |
line = [] | |
line.append(date) | |
for programme in programs: | |
line.append(num_sanctioned[programme]) | |
outcome.append(line) | |
date = date + relativedelta(months=1) | |
print "date\t", | |
for programme in programs: | |
print str(programme)+"\t", | |
for line in outcome: | |
cell_count = 0 | |
for cell in line: | |
if cell_count == 0: | |
print "\n" + line[0].strftime("%Y-%m") + "\t", | |
else: | |
print str(cell) + '\t', | |
cell_count += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment