-
-
Save AkiyonKS/d42d89c804f96bac2f7634435bb00078 to your computer and use it in GitHub Desktop.
fetch collections of trains
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import collections | |
import os | |
# 関数定義ここから | |
# 辞書型データをDataFrameに変換 | |
def edit_dict(dict): | |
df = pd.DataFrame.from_dict(dict, orient='index').reset_index() | |
# 列名を変更 | |
df = df.rename(columns={'index': 'value', 0: 'count'}) | |
# valueでソート後、countとvalueを抽出 | |
df = df.sort_values("value").reset_index().drop('index', axis=1).reindex(columns=["count","value"]) | |
# 修正情報用の列を追加 | |
df['value_r'] = '' * len(df) | |
return df | |
# 列(col_name)の値がvalueの場合のcol_name2の値を抽出、重複を削除して","で結合してreturn | |
def fetch_column_values(value, col_name, trains, col_name2): | |
values = trains.loc[trains[col_name] == value][col_name2] | |
values = set(values) | |
if col_name2 == 'photo_id': | |
values = list(map(int, values)) | |
values.sort() | |
values = list(map(str, values)) | |
tmp = ','.join(values) | |
return tmp | |
# 列名(複数可能)を指定して要素、カウント数の情報を取得、csvファイルに保存 | |
# col_names = ["label", "編成","路線","車両","形式","愛称","company_name"] | |
def fetch_collections_trains(df, col_names): | |
# dfは車両情報のデータフレーム (trains.csv) | |
# 列名の重複を削除 | |
col_names2 = list(set(df.columns) & set(col_names)) | |
# データフレームからcol_name2を抽出してテキストに変換 | |
df_r = df.reindex(columns=col_names2) | |
df_r = df_r.applymap(str) | |
# collections.Counterで要素とカウント数を取得(辞書型) | |
tmp = list(df_r.apply(collections.Counter, axis=0)) | |
# 辞書型をedit_dictでデータフレームに変換 | |
tmp2 = list(map(edit_dict, tmp)) | |
# company_name(鉄道会社名)とphoto_idの情報を追加 | |
for i, v in enumerate(col_names2): | |
print(v) | |
df2 = tmp2[i] | |
if v not in ["company_name", "keishiki_company"]: | |
tmp2[i]['company_name'] = list(map(lambda x: fetch_column_values(x, v, df, "company_name"), df2['value'])) | |
tmp2[i]['photo_id'] = list(map(lambda x: fetch_column_values(x, v, df, "photo_id"), df2['value'])) | |
# 結果を辞書型にして名前をつける | |
tmp3 = dict(zip(col_names2, tmp2)) | |
return tmp3 | |
def save_collections_data(dict_df, save_file_add_name=""): | |
# 保存するファイル名に使用する情報を記載 | |
fnames = { | |
"label": "label", | |
"keishiki_company": "keishiki_company", | |
"company_keishiki": "company_keishiki", | |
"company_name": "company_name", | |
"形式": "keishiki", | |
"愛称": "aishou", | |
"編成": "hensei", | |
"路線": "rosen", | |
"車両": "syaryou", | |
} | |
# csvファイルに保存 | |
# "形式"の場合, collections_keishiki.csvというファイル名になる | |
for v in dict_df.keys(): | |
path = '../csv/collections_' + fnames[v] + save_file_add_name + '.csv' | |
if os.path.isfile(path): | |
os.rename(path, '../csv/collections_' + fnames[v] + save_file_add_name + '_backup.csv') | |
dict_df[v].to_csv(path, index=False) | |
# 関数定義ここまで | |
# 以下実行部分 | |
# 車両情報のcsvファイルを読み込む | |
file_path = "../csv/trains.csv" | |
df = pd.read_csv(file_path) | |
# 無名の列を読み込んだ場合に削除 | |
if df.columns[[0]] != 'photo_id': | |
df = df.drop(columns=df.columns[[0]]) | |
# 複数の列に対して要素数とカウント数の情報を取得し、csvデータに保存 | |
col_names = ["label", "company_name","形式", "編成","路線","車両","愛称"] | |
tmp = fetch_collections_trains(df, col_names) | |
save_collections_data(tmp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment