Skip to content

Instantly share code, notes, and snippets.

@Vanuan Vanuan/age_scraper.py Secret
Last active Sep 9, 2016

Embed
What would you like to do?
#encoding=utf-8
import requests
import re
data = []
regions = []
r = requests.get('http://www.lv.ukrstat.gov.ua/dem/SWF/dani/age/age1.php')
r.encoding = 'cp1251'
lines = r.text.splitlines()
for line in lines:
region = re.search('<option value="(.*?)"(?: selected )?>(.*?)</option>', line)
if region:
regions.append(region.groups())
for id_,name in regions:
print name
r = requests.get('http://www.lv.ukrstat.gov.ua/dem/SWF/dani/age/%s.svg' % id_)
r.encoding = 'cp1251'
lines = r.text.splitlines()
for line in lines:
years_match = re.search('var currYear = \[(.*?)\];', line)
if years_match:
years = map(lambda x: x.replace("'", ''), years_match.groups()[0].split(','))
for line in lines:
age_match = re.search('stat([a-z]+)(\d\d?\d?)=\[(.*?)\];', line)
if age_match:
sex, age, count_by_year = age_match.groups()
counts_by_year = count_by_year.replace('"', '').split(',')[1:]
for i, count in enumerate(counts_by_year):
data.append((id_, name.strip(), sex, age, years[i], count))
with open('age_stats.csv', 'w') as f:
f.write((','.join([u'КОАТУУ регіону',u'регіон',u'стать',u'вік',u'рік',u'кількість']) + '\n').encode('utf-8'))
for line in data:
f.write((','.join(line) + '\n').encode('utf-8'))
@iakoff

This comment has been minimized.

Copy link

commented Sep 9, 2016

И че ? работает скрипт ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.