Created
October 11, 2017 14:56
-
-
Save ansakoy/e01ec8b923a3a0e147ff76bcbda3da3d to your computer and use it in GitHub Desktop.
Скрипт для выгрузки данных по импорту молочной продукции
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
ИСТОЧНИК ДАННЫХ: https://hubofdata.ru/dataset/customs-stats | |
Файлы из архива были распакованы в директорию 'TCBT' | |
''' | |
import csv | |
import os | |
# FIELDS | |
DIRECTION = 'direction' | |
PERIOD = 'period' | |
COUNTRY = 'country' | |
TNVED = 'tnved' | |
UNIT = 'unit' | |
PRICE = 'price' | |
NETTO = 'netto' | |
QUANTITY = 'quantity' | |
REGION = 'region' | |
DISTRICT = 'district' | |
def read_source(f_name): | |
''' | |
В качестве аргумента берет название файла, содержащего csv-таблицу | |
Извлечь из файла f_name строки, отвечающие заданным параметрам | |
Представить эти строки в виде списка словарей | |
Возвращает список словарей | |
''' | |
results = list() | |
with open(f_name) as handler: | |
reader = csv.reader(handler) | |
idx = 0 | |
for row in reader: | |
if idx > 0: | |
direction = row[0].decode('cp866').encode('utf-8') | |
tnved = row[3] | |
if tnved == '04' and direction == 'ИМ': | |
output = {DIRECTION: direction, | |
PERIOD: row[1], | |
COUNTRY: row[2], | |
TNVED: tnved, | |
UNIT: row[4].decode('cp866').encode('utf-8'), | |
PRICE: row[5], | |
NETTO: row[6], | |
QUANTITY: row[7], | |
REGION: row[8], | |
DISTRICT: row[9]} | |
results.append(output) | |
idx += 1 | |
return results | |
def dir_walker(folder): | |
''' | |
В качестве аргумента берет папку, содержащую нужные файлы CSV | |
Для каждого файла запускает функцию read_source | |
Возвращает список словарей | |
''' | |
results = list() | |
for filename in os.listdir(folder): | |
print 'processing', filename | |
path = '{}/{}'.format(folder, filename) | |
output = read_source(path) | |
results.extend(output) | |
return results | |
def extract_data(): | |
''' | |
Записать извлеченные строки в файл CSV | |
''' | |
csv_headers = [DIRECTION, PERIOD, COUNTRY, | |
TNVED, UNIT, PRICE, NETTO, | |
QUANTITY, REGION, DISTRICT] | |
with open('cust_milk_products.csv', 'wb') as handler: | |
results = dir_walker('TCBT') | |
print 'Writing...' | |
writer = csv.DictWriter(handler, fieldnames=csv_headers) | |
writer.writeheader() | |
for item in results: | |
writer.writerow(item) | |
print 'Done!' | |
if __name__ == '__main__': | |
extract_data() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment