Created
January 19, 2016 15:24
-
-
Save ansakoy/eaf18e16f4a1405fdb9d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import requests | |
import time | |
import csv | |
okpd_okdp = {'2109365': 'Бумага туалетная', '21.22.99.000 ': 'Услуги по производству хозяйственной и туалетной бумаги и бумажных изделий', '21.22.11.110': 'Бумага туалетная', '21.22.11.111': 'Бумага туалетная с массой 1 кв. м каждого слоя не более 25 г', '21.22.11.119': 'Бумага туалетная с массой 1 кв. м каждого слоя более 25 г'} | |
def make_dict(target_contract, target_product, target_code): | |
reg_num = target_contract[u'regNum'].encode('utf-8') | |
contract_dict = dict() | |
contract_dict['code_meaning'] = okpd_okdp[target_code] | |
contract_dict['okdp_okpd'] = target_code | |
contract_dict['product_description'] = target_product.get(u'name', u'-').encode('utf-8') | |
okei = target_product.get(u'OKEI') | |
if okei: | |
contract_dict['okei_code'] = okei.get(u'code', '-') | |
contract_dict['okei_meaning'] = okei.get(u'name', '-').encode('utf-8') | |
else: | |
contract_dict['okei_code'] = '-' | |
contract_dict['okei_meaning'] = '-' | |
contract_dict['reg_num'] = reg_num | |
contract_dict['sign_date'] = target_contract.get(u'signDate', u'-').encode('utf-8') | |
customer = target_contract[u'customer'] | |
contract_dict['customer_name'] = customer.get(u'fullName', u'-').encode('utf-8') | |
contract_dict['customer_kpp'] = customer.get(u'kpp', u'0').encode('utf-8') | |
contract_dict['customer_inn'] = customer.get(u'inn',u'0').encode('utf-8') | |
supplier = target_contract[u'suppliers'][0] | |
if len(target_contract[u'suppliers']) > 1: | |
print 'multiple suppliers:', target_contract.get(u'regNum', '?') | |
contract_dict['supplier_name'] = supplier.get(u'organizationName', u'0').encode('utf-8') | |
contract_dict['supplier_kpp'] = supplier.get(u'kpp', u'0').encode('utf-8') | |
contract_dict['supplier_inn'] = supplier.get(u'inn', u'0').encode('utf-8') | |
contract_dict['region'] = target_contract[u'regionCode'].encode('utf-8') | |
contract_dict['price'] = float(target_product.get(u'price', 0)) | |
contract_dict['quantity'] = float(target_product.get(u'quantity', 0)) | |
contract_dict['link'] = 'http://clearspending.ru/contract/{}'.format(reg_num) | |
contract_dict['fz'] = target_contract.get(u'fz', u'?').encode('utf-8') | |
return contract_dict | |
def extract_values(lst, target_region, code): | |
api = 'http://openapi.clearspending.ru/restapi/v3/contracts/search/?daterange=01.01.2015-31.12.2015&customerregion={}&okdp_okpd={}'.format(target_region, code) | |
raw = requests.get(api) | |
try: | |
data = raw.json() | |
num_of_contracts = data[u'contracts'][u'total'] | |
if num_of_contracts == 500: | |
print "too many contracts", api | |
num_of_pages = num_of_contracts // 50 + (num_of_contracts % 50 != 0) | |
for page in xrange(1, num_of_pages + 1): | |
page_url = '{}&page={}'.format(api, page) | |
data = requests.get(page_url).json() | |
length = len(data[u'contracts'][u'data']) | |
for index in xrange(length): | |
contract = data[u'contracts'][u'data'][index] | |
products = contract.get(u'products') | |
if not products: | |
print 'No products', contract.get(u'regNum', api) | |
else: | |
length_products = len(products) | |
for product in products: | |
okdp = product.get(u'OKDP') | |
okpd = product.get(u'OKPD') | |
if okdp: | |
if okdp.get(u'code', 0) == code: | |
tmp_d = make_dict(contract, product, code) | |
if length_products == 1: | |
tmp_d['price'] = product.get(u'sum', 0) | |
lst.append(tmp_d) | |
elif okpd: | |
if okpd.get(u'code', 0) == code: | |
tmp_d = make_dict(contract, product, code) | |
if length_products == 1: | |
tmp_d['price'] = product.get(u'sum', 0) | |
lst.append(tmp_d) | |
return lst | |
except Exception as error: | |
print 'error:', error, 'status code:', raw.status_code, 'api:', api | |
values_list = list() | |
start = time.time() | |
for code in okpd_okdp: | |
for region in xrange(1, 100): | |
extract_values(values_list, region, code) | |
csv_keys = ['reg_num', 'sign_date', 'region', 'customer_name', 'customer_kpp', | |
'customer_inn', 'okdp_okpd', 'code_meaning', 'product_description', | |
'price', 'okei_code', 'okei_meaning', 'quantity', 'supplier_name', 'supplier_kpp', 'supplier_inn', 'link', 'fz'] | |
with open('paper2015.csv', 'wb') as f: | |
writer = csv.DictWriter(f, fieldnames=csv_keys) | |
writer.writeheader() | |
for item in values_list: | |
writer.writerow(item) | |
stop = time.time() | |
print "Running time", stop - start |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment