Skip to content

Instantly share code, notes, and snippets.

@spaceCamel
Created May 14, 2010 13:56
Show Gist options
  • Save spaceCamel/401170 to your computer and use it in GitHub Desktop.
Save spaceCamel/401170 to your computer and use it in GitHub Desktop.
import re
from glob import glob
nCo = re.compile("(\d+)\s+(.*)")
attributes = ['position', 'name', 'url', 'country', 'rate', 'field']
#all_fields = ['Software', 'Telecommunications/Networking', 'Media/Entertainment', 'Internet', 'Biotech/Pharmaceutical/ Medical Equipment', 'Other', 'Greentech', 'internet', 'Semiconductor, Components and Electronics', 'Computers/Peripherals']
#all_countries = ['Turkey', 'United Kingdom', 'Belgium', 'Germany', 'Netherlands', 'Republic of Ireland', 'Poland', 'France', 'Norway', 'Israel', 'Northern Ireland', 'Sweden', 'Russia', 'Romania', 'Finland', 'Bulgaria', 'Hungary', 'Greece', 'Serbia', 'Slovakia', 'Denmark', 'Austria', 'Czech Republic', 'Portugal', 'Croatia', 'Estonia']
my_fields = dict.fromkeys(['Software', 'Internet'])
my_countries = dict.fromkeys(['United Kingdom'])
my_out_html = open("AAA_Fast500_UK_SW2.html", 'w')
all_companies = []
for file in glob("Fast500_*.txt"):
f = iter(open(file))
line = ""
companies = []
for line in f:
m = nCo.match(line.strip())
if m:
company = {attributes[0]: m.group(1), attributes[1]: m.group(2)}
companies.append(company)
else:
break
for attr in iter(attributes[2:]):
for company in companies:
company[attr] = line.strip()
try:
line = f.next()
except StopIteration:
break
all_companies.extend(companies)
my_companies = [c for c in all_companies if c['field'] in my_fields and c['country'] in my_countries]
def format_str(att):
default = "%({0})s"
url = '"%({0})s":http://%({0})s'
if att == 'url':
return url.format(att)
return default.format(att)
txtmpl = []
for company in my_companies:
txt_row = "| %s |\n" % " | ".join(map(format_str, attributes)) % company
txtmpl.append(txt_row)
txt_header = "|_. %s |" % " |_. ".join([a.capitalize() for a in attributes])
import textile
my_out_html.write(textile.textile("%s\n%s" % (txt_header, "".join(txtmpl))))
my_out_html.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment