Skip to content

Instantly share code, notes, and snippets.

@ansakoy
Created December 16, 2015 05:43
Show Gist options
  • Save ansakoy/8679f0dc08818206b404 to your computer and use it in GitHub Desktop.
Save ansakoy/8679f0dc08818206b404 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import requests
import csv
import time
def extract_values(target_api, lst):
raw = requests.get(target_api)
try:
data = raw.json()
num_contracts = data[u'contracts'][u'total'] # общее число контрактов
print num_contracts
num_pages = num_contracts // 50 + (num_contracts % 50 != 0) # число страниц с контрактами
print num_pages
for page in xrange(1, num_pages + 1):
page_url = '{}&page={}'.format(target_api, page)
print page_url
data = requests.get(page_url).json()
length = len(data[u'contracts'][u'data']) # число контрактов на странице
for index in xrange(length): # перебираем контракты
contract = data[u'contracts'][u'data'][index]
products = contract[u'products']
for product in products: # перебираем продукты
contract_dict = dict()
name = product.get(u'name') # наименование продукта
if u'пальмовое масло' in name \
or u'Пальмовое масло' in name or u'ПАЛЬМОВОЕ МАСЛО' in name\
or u'Пальмовое Масло' in name or u'пальм' in name or u'Пальм' in name or u'ПАЛЬМ' in name:
okpd = product.get(u'OKPD') # если ОКПД отсутствует, значение None
okdp = product.get(u'OKDP') # если ОКДП отсутствует, значение None
prod_code = 0
if okdp is not None:
prod_code = okdp.get(u'code')
elif okpd is not None:
prod_code = okpd.get(u'code')
reg_num = contract.get(u'regNum') # идентификационный номер контракта
contract_dict['product'] = name.encode('utf-8')
contract_dict['okdp_okpd'] = prod_code # код продукта
contract_dict['reg_num'] = reg_num
contract_dict['sign_date'] = contract.get(u'signDate', u'-') # дата подписания контракта
customer = contract[u'customer']
contract_dict['customer_name'] = customer.get(u'fullName', u'-').encode('utf-8') # наименование заказчика
contract_dict['customer_kpp'] = customer.get(u'kpp', u'0') # КПП заказчика
contract_dict['customer_inn'] = customer.get(u'inn',u'0') # ИНН поставщика
supplier = contract[u'suppliers'][0]
if len(contract[u'suppliers']) > 1: # на случай, если поставщиков больше одного
print 'multiple suppliers:', contract.get(u'regNum')
contract_dict['supplier_name'] = supplier.get(u'organizationName', u'0').encode('utf-8') # наименование поставщика
contract_dict['supplier_kpp'] = supplier.get(u'kpp', u'0') # КПП поставщика
contract_dict['supplier_inn'] = supplier.get(u'inn', u'0') # ИНН поставщика
contract_dict['region'] = contract.get(u'regionCode') # регион заказчика
contract_dict['price'] = float(product.get(u'sum', 0)) # общая цена
contract_dict['link'] = 'http://clearspending.ru/contract/{}'.format(reg_num) # URL контракта
lst.append(contract_dict)
except Exception as er:
print er, 'status code:', raw.status_code, target_api
csv_keys = ['reg_num', 'sign_date', 'region', 'customer_name', 'customer_kpp',
'customer_inn', 'supplier_name', 'supplier_kpp', 'supplier_inn',
'okdp_okpd', 'product', 'price', 'link']
api = 'http://openapi.clearspending.ru/restapi/v3/contracts/search/?productsearch=пальмовое+масло&daterange=01.01.2013-31.12.2015'
data_list = list()
start = time.time()
try:
extract_values(api, data_list)
except Exception as error:
print error, 'url:', api
# запись в файл CSV
with open('palm_text.csv', 'wb') as f:
writer = csv.DictWriter(f, fieldnames=csv_keys)
writer.writeheader()
for item in data_list:
writer.writerow(item)
stop = time.time()
print 'Running time:', stop - start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment