Skip to content

Instantly share code, notes, and snippets.

@n-kb
Created June 2, 2016 14:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save n-kb/5501491914bac4a472a4fe77d1a77186 to your computer and use it in GitHub Desktop.
Save n-kb/5501491914bac4a472a4fe77d1a77186 to your computer and use it in GitHub Desktop.
import xml.etree.cElementTree as ET
from os import listdir
from os.path import isfile, join
from datetime import datetime
from dateutil.relativedelta import relativedelta
sanctioned = {}
programs = []
# Parses global file
GLOBAL_FILE = "data/global.xml"
e = ET.ElementTree(file=GLOBAL_FILE).getroot()
for entity in e.iter("sanctionEntity"):
date_from = entity.find("nameAlias").find("regulationSummary").get("publicationDate")
date_from = datetime.strptime(date_from, '%Y-%m-%d')
id = entity.get("logicalId")
programme = entity.find("regulation").get("programme")
name = entity.find("nameAlias").get("wholeName")
if programme not in programs:
programs.append(programme)
sanctioned[id] = {"date_from": date_from, "date_to":datetime.strptime("3000-01-01", '%Y-%m-%d'), "id": id, "programme": programme, "name": name}
# Parses other files
diff_files = [f for f in listdir("data/") if isfile(join("data/", f))]
for filename in diff_files:
if filename != GLOBAL_FILE:
e = ET.ElementTree(file="data/"+filename).getroot()
for entity in e.iter("ENTITY"):
id = entity.get("Id")
date = e.get("Date")
date = datetime.strptime(date, '%d/%m/%Y')
try:
name = entity.find("NAME").find("WHOLENAME").text
except AttributeError:
name = "None"
programme = entity.get("programme")
if programme not in programs:
programs.append(programme)
if entity.get("Id") not in sanctioned:
# If the sanctioned is not in the latest batch, it means it was removed at some point
sanctioned[id] = {"date_to": date, "date_from":datetime.strptime("1000-01-01", '%Y-%m-%d'), "id": id, "programme": programme, "name": name}
elif sanctioned[id]["date_from"] == datetime.strptime("1000-01-01", '%Y-%m-%d'):
# Sanctioned has been removed from GLOBAL, readded from the Archive ; now needs to find when it was added
sanctioned[id]["date_from"] = date
# Produces the file of sanctions by month and programme
date = datetime.strptime("2000-01-01", '%Y-%m-%d')
outcome = []
while date < datetime.today():
num_sanctioned = {"None": 0}
for programme in programs:
num_sanctioned[programme] = 0
for id, entity in sanctioned.iteritems():
if (entity["date_to"] > date) and (entity["date_from"] < date):
if entity["programme"] == None:
entity["programme"] = "None"
num_sanctioned[entity["programme"]] += 1
line = []
line.append(date)
for programme in programs:
line.append(num_sanctioned[programme])
outcome.append(line)
date = date + relativedelta(months=1)
print "date\t",
for programme in programs:
print str(programme)+"\t",
for line in outcome:
cell_count = 0
for cell in line:
if cell_count == 0:
print "\n" + line[0].strftime("%Y-%m") + "\t",
else:
print str(cell) + '\t',
cell_count += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment