Skip to content

Instantly share code, notes, and snippets.

@GINK03
Last active April 3, 2017 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GINK03/3d2b299244c6888e2d94e9b6963b5a41 to your computer and use it in GitHub Desktop.
Save GINK03/3d2b299244c6888e2d94e9b6963b5a41 to your computer and use it in GitHub Desktop.
import os
import math
import sys
import itertools
import subprocess
import glob
import csv
from collections import OrderedDict as dict
import pickle
import re
headers = set("計上月,担当者CD,担当者,請求先CD,請求先科目CD,請求先名,請求先備考1,請求先備考2,請求先予備1,請求先予備2,請求先予備3,請求書送付先CD,請求書送付先担当者,請求書送付先住所,サービス概要,商品CD,商品名,項目,売上期間,注記,売上詳細備考,税抜売上,税込売上,粗利,支払予定日,税抜原価,税込原価,支払先CD,支払先名,支払先備考1,支払先備考2,支払先予備1,支払先予備2,支払先予備3,申込備考1,アカウントNo,アカウント備考,クライアントCD,クライアント名,クライアント備考1,クライアント備考2,クライアント予備1,クライアント予備2,クライアント予備3,契約備考1,契約備考2,その他備考1,その他備考2,請求書出力単位,プロジェクトCD,プロジェクト名,プロジェクト備考1,プロジェクト備考2,申込予算,マージン,管理費割合,値引方法CD,値引方法名,値引パターンCD,値引パターン名,コミッション割合,定額値引額,定額値引率,値引備考,更新ステータスCD,更新ステータス名,契約書回収日,契約No,契約NoSEQ,契約売上明細行No,契約開始日,契約終了日,売上明細開始日,売上明細終了日,事業部CD,事業部名,部門CD,部門名,大分類CD,大分類名,中分類CD,中分類名,小分類CD,小分類名,商品備考,商品予備2,商品予備3,商品予備4,商品予備5,商品予備6CD,商品予備6名称,商品予備7CD,商品予備7名称,商品予備8CD,商品予備8名称,商品予備9CD,商品予備9名称,商品予備10CD,商品予備10名称,商品予備11名称,商品予備12名称,商品予備13名称,商品予備14名称,商品予備15名称".split(","))
def sum_monoid(header, monoids):
monad = dict()
for key in header:
if key in ["税抜売上", "税込売上", "粗利", "税抜原価", "税込原価"]:
#b = 0
#for monoid in monoids:
# if monoid[key] == "":
# monoid[key] = 0
# b += float(monoid[key])
#for m in monoids:
# print("税込原価", m["税込原価"] )
monad[key] = sum(list(map(lambda x:0. if x[key]=='' else float(x[key]), monoids)))
else:
if key in ["商品名"]:
result = list(filter(lambda x:x!="", map(lambda x:x[key], monoids)))
if result == []:
monad[key] = ""
else:
monad[key] = result[0].replace(' ', ' ')
if key in ["請求積"]:
# ここに分割条件をいれてください
pass
if key in ["計上月"]:
result = list(filter(lambda x:x!="", map(lambda x:x[key], monoids)))
monad[key] = result[0]
if key in ["クライアントCD"]:
result = list(filter(lambda x:x!="", map(lambda x:x[key], monoids)))
monad[key] = result[0]
if key in headers:
result = list(filter(lambda x:x!="", map(lambda x:x[key], monoids)))
if result != []:
monad[key] = result[0]
else:
monad[key] = "-"
return monad
def analystic():
seller_type2lastoutput = {}
with open('./sellerid2lastoutput.txt', 'r') as f:
for line in f:
ents = line.split(' ')
seller_type = ents[0]
print("sellet type", seller_type)
lastoutput = line.split()[-1]
seller_type2lastoutput[seller_type] = lastoutput
for name in glob.glob('./*.csv.utf8'):
with open(name) as f:
cid_monoids = dict()
it = csv.reader(f, delimiter=',', quotechar='"')
header = next(it)
now_client_cd = None
monoidic = []
for pi, ps in enumerate(it):
monoid = dict(list(zip(header, ps)))
if monoid['大分類名'] != "広告代理":
continue
if monoid.get('契約No') is None:
continue
try:
monoid['商品名'] = seller_type2lastoutput[monoid['商品名'].replace(' ', ' ')]
except KeyError as e:
print("cannot find e", e)
continue
# アドホックサニタイゼーション
monoid['クライアント名'] = monoid['クライアント名'].replace(' ', '')
cid = monoid['契約No'] + " " + monoid['クライアント名'] + " 直販"
if cid_monoids.get(cid) == None:
cid_monoids[cid] = []
cid_monoids[cid].append(monoid)
cid_monad = {}
for cid, monoids in cid_monoids.items():
cid_monad[cid] = sum_monoid(header, monoids)
open('./conv/%s.pkl'%name, 'wb').write(pickle.dumps(cid_monad))
try:
sellertype_index = pickle.loads(open('sellertype_index.pkl', 'rb').read())
except FileNotFoundError as e:
from collections import OrderedDict
with open('./sellerid2lastoutput.txt', 'r') as f:
sellertype_index = OrderedDict()
for line in f:
line = line.strip()
lastoutput = line.split()[-1]
if sellertype_index.get(lastoutput) is None:
sellertype_index[lastoutput] = len(sellertype_index)
open('sellertype_index.pkl', 'wb').write(pickle.dumps(sellertype_index))
with open('./kasu.txt', 'r') as f:
client_name_flag = {}
for excel in f:
excel = excel.strip()
ents = excel.split()
client_name = ents[0]
flag = int(ents[-1])-1
client_name_flag[client_name] = flag
## 企業コード, 商流(comercial distributeコード)
clientcd_cdist = {}
cdist_index = {}
with open('./cdist_clientcd.txt') as f:
for line in f:
line = line.strip()
ents = line.split()
cdist = ents[0]
clientcd = ents[-1]
clientcd_cdist[clientcd] = cdist
for clientcd, cdist in clientcd_cdist.items():
if cdist_index.get(cdist) is None:
cdist_index[cdist] = len(cdist_index)
open('cdist_index.pkl', 'wb').write(pickle.dumps(cdist_index))
def _client_name_vectorizer(client_name):
vec = [0.]*len(cdist_index)
vec[cdist_index[clientcd_cdist[client_name]]] = 1.
return '*'.join(map(str, vec))
def _seller_type_vectorizer(type_name):
vec = [0.]*len(sellertype_index)
vec[sellertype_index[type_name]] = 1.
return '*'.join(map(str, vec))
def tinger():
class Amounts(object):
def __init__(self):
self.uriage_nozei = 0.
self.uriage_zei = 0.
self.arari = 0.
self.tax_genka = 0.
self.notax_genka = 0.
self.monad = {}
key_amount = {}
for name in glob.glob('./conv/*.pkl'):
cid_monad = pickle.loads(open(name, 'rb').read())
for cid, monad in cid_monad.items():
last_word = cid.split()[-1]
client_name = cid.split()[-2]
seller_type = monad["商品名"]
client_cd = monad["クライアントCD"]
date = monad["計上月"]
#key = monad['商品名']
key = date + "__SEP__" + client_cd
#key = date + "*" + last_word + "*" + seller_type.replace(" ", "")
#key = date + "*" + client_name
#key = date + "*" + client_cd + "*" + client_name + "*" + seller_type
#key = client_name + "*" + seller_type + "*" + last_word
try:
#key = client_name + "*" + _client_name_vectorizer(client_cd) + "*" + str(client_name_flag[client_name]) +" * " + _seller_type_vectorizer(seller_type)
#key = client_name + "*" + _client_name_vectorizer(client_cd) + "*" + str(client_name_flag[client_name])
pass
except KeyError as e:
print(e, file=sys.stderr)
continue
uriage_nozei = monad["税抜売上"]
uriage_zei = monad["税込売上"]
arai = monad["粗利"]
notax_genka = monad["税抜原価"]
tax_genka = monad["税込原価"]
if key_amount.get(key) is None: key_amount[key] = Amounts()
key_amount[key].uriage_nozei += uriage_nozei
key_amount[key].uriage_zei += uriage_zei
key_amount[key].arari += arai
key_amount[key].tax_genka += tax_genka
key_amount[key].notax_genka += notax_genka
key_amount[key].monad = monad
for key, amount in key_amount.items():
del amount.monad['税抜売上']
del amount.monad['税込売上']
del amount.monad['粗利']
del amount.monad["税抜原価"]
del amount.monad["税込原価"]
data = "__SEP__".join(["%s=%s"%(k,v) for k,v in amount.monad.items()])
print("__SEP__".join(list(map(lambda x:re.sub(r"\s| ", "", str(x)), [key, data, \
"税抜売上=%d"%amount.uriage_nozei, \
"税込売上=%d"%amount.uriage_zei, \
"粗利=%d"%amount.arari, \
"税抜原価=%d"%amount.notax_genka, \
"税込原価=%d"%amount.tax_genka]))))
def conv():
for name in glob.glob('./obic/*.csv'):
last_name = name.split('/')[-1]
ret = os.system("cat %s | nkf -S -w > %s.utf8"%(name, last_name) )
print(ret)
if __name__ == '__main__':
if '--conv' in sys.argv:
conv()
if '-a' in sys.argv:
analystic()
if '-t' in sys.argv:
tinger()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment