Skip to content

Instantly share code, notes, and snippets.

@ansakoy
Created January 19, 2016 12:46
Show Gist options
  • Save ansakoy/4f504239cbb5ab34262f to your computer and use it in GitHub Desktop.
Save ansakoy/4f504239cbb5ab34262f to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import requests
import time
import csv
okpd_okdp = {'2109365': 'Бумага туалетная', '21.22.99.000 ': 'Услуги по производству хозяйственной и туалетной бумаги и бумажных изделий', '21.22.11.110': 'Бумага туалетная', '21.22.11.111': 'Бумага туалетная с массой 1 кв. м каждого слоя не более 25 г', '21.22.11.119': 'Бумага туалетная с массой 1 кв. м каждого слоя более 25 г'}
def make_dict(target_contract, target_product, target_code):
reg_num = target_contract[u'regNum'].encode('utf-8')
contract_dict = dict()
contract_dict['code_meaning'] = okpd_okdp[target_code]
contract_dict['okdp_okpd'] = target_code
contract_dict['product_description'] = target_product.get(u'name', u'-').encode('utf-8')
okei = target_product.get(u'OKEI')
if okei:
contract_dict['okei_code'] = okei.get(u'code', '-')
contract_dict['okei_text'] = okei.get(u'name', '-')
else:
contract_dict['okei_code'] = '-'
contract_dict['okei_text'] = '-'
contract_dict['reg_num'] = reg_num
contract_dict['sign_date'] = target_contract.get(u'signDate', u'-').encode('utf-8')
customer = target_contract[u'customer']
contract_dict['customer_name'] = customer.get(u'fullName', u'-').encode('utf-8')
contract_dict['customer_kpp'] = customer.get(u'kpp', u'0').encode('utf-8')
contract_dict['customer_inn'] = customer.get(u'inn',u'0').encode('utf-8')
supplier = target_contract[u'suppliers'][0]
if len(target_contract[u'suppliers']) > 1:
print 'multiple suppliers:', target_contract.get(u'regNum', '?')
contract_dict['supplier_name'] = supplier.get(u'organizationName', u'0').encode('utf-8')
contract_dict['supplier_kpp'] = supplier.get(u'kpp', u'0').encode('utf-8')
contract_dict['supplier_inn'] = supplier.get(u'inn', u'0').encode('utf-8')
contract_dict['region'] = target_contract[u'regionCode'].encode('utf-8')
contract_dict['price'] = float(target_product.get(u'sum', 0))
contract_dict['link'] = 'http://clearspending.ru/contract/{}'.format(reg_num)
contract_dict['fz'] = target_contract.get(u'fz', u'?').encode('utf-8')
return contract_dict
def extract_values(lst, target_region, code):
api = 'http://openapi.clearspending.ru/restapi/v3/contracts/search/?daterange=01.01.2015-31.12.2015&customerregion={}&okdp_okpd={}'.format(target_region, code)
raw = requests.get(api)
try:
data = raw.json()
num_of_contracts = data[u'contracts'][u'total']
if num_of_contracts == 500:
print "too many contracts", api
num_of_pages = num_of_contracts // 50 + (num_of_contracts % 50 != 0)
for page in xrange(1, num_of_pages + 1):
page_url = '{}&page={}'.format(api, page)
data = requests.get(page_url).json()
length = len(data[u'contracts'][u'data'])
for index in xrange(length):
contract = data[u'contracts'][u'data'][index]
products = contract.get(u'products')
if not products:
print 'No products', contract.get(u'regNum', api)
else:
for product in products:
okdp = product.get(u'OKDP')
okpd = product.get(u'OKPD')
if okdp:
if okdp.get(u'code', 0) == code:
lst.append(make_dict(contract, product, code))
elif okpd:
if okpd.get(u'code', 0) == code:
lst.append(make_dict(contract, product, code))
return lst
except Exception as error:
print 'error:', error, 'status code:', raw.status_code, 'api:', api
values_list = list()
start = time.time()
for code in okpd_okdp:
for region in xrange(1, 100):
extract_values(values_list, region, code)
csv_keys = ['reg_num', 'sign_date', 'region', 'customer_name', 'customer_kpp',
'customer_inn', 'okdp_okpd', 'code_meaning', 'product_description',
'price', 'supplier_name', 'supplier_kpp', 'supplier_inn', 'link', 'fz']
with open('paper2015.csv', 'wb') as f:
writer = csv.DictWriter(f, fieldnames=csv_keys)
writer.writeheader()
for item in values_list:
writer.writerow(item)
stop = time.time()
print "Running time", stop - start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment