-
-
Save Vanuan/a4cbc66c830d60d7707d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#encoding=utf-8 | |
import requests | |
import re | |
data = [] | |
regions = [] | |
r = requests.get('http://www.lv.ukrstat.gov.ua/dem/SWF/dani/age/age1.php') | |
r.encoding = 'cp1251' | |
lines = r.text.splitlines() | |
for line in lines: | |
region = re.search('<option value="(.*?)"(?: selected )?>(.*?)</option>', line) | |
if region: | |
regions.append(region.groups()) | |
for id_,name in regions: | |
print name | |
r = requests.get('http://www.lv.ukrstat.gov.ua/dem/SWF/dani/age/%s.svg' % id_) | |
r.encoding = 'cp1251' | |
lines = r.text.splitlines() | |
for line in lines: | |
years_match = re.search('var currYear = \[(.*?)\];', line) | |
if years_match: | |
years = map(lambda x: x.replace("'", ''), years_match.groups()[0].split(',')) | |
for line in lines: | |
age_match = re.search('stat([a-z]+)(\d\d?\d?)=\[(.*?)\];', line) | |
if age_match: | |
sex, age, count_by_year = age_match.groups() | |
counts_by_year = count_by_year.replace('"', '').split(',')[1:] | |
for i, count in enumerate(counts_by_year): | |
data.append((id_, name.strip(), sex, age, years[i], count)) | |
with open('age_stats.csv', 'w') as f: | |
f.write((','.join([u'КОАТУУ регіону',u'регіон',u'стать',u'вік',u'рік',u'кількість']) + '\n').encode('utf-8')) | |
for line in data: | |
f.write((','.join(line) + '\n').encode('utf-8')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
И че ? работает скрипт ?