Skip to content

Instantly share code, notes, and snippets.

@AkiyonKS
Created August 9, 2022 02:15
Show Gist options
  • Save AkiyonKS/74e77c005bc81ed8bd844e8bcaaf8bed to your computer and use it in GitHub Desktop.
Save AkiyonKS/74e77c005bc81ed8bd844e8bcaaf8bed to your computer and use it in GitHub Desktop.
add company name and country to trains
import pandas as pd
# 鉄道会社名修正用csvを読み込み
df = pd.read_csv("../csv/railway_companies.csv")
df = df.drop(columns=df.columns[[0]]) # 無名列を削除
# value_rがある場合はvalue_r, ない場合はvalueを取得してtmp列に代入
df['tmp'] = list(df.apply(lambda x: x.value_r if type(x.value_r) == str else x.value, axis=1))
df = df.drop(['value_r'], axis=1) # value_r列を削除
# 列名変更
df = df.rename(columns={'tmp': 'company_name', 'value': 'company_name0'})
# trains.csv読み込み
file_path = "../csv/trains.csv"
trains = pd.read_csv(file_path)
trains = trains.drop(columns=trains.columns[[0]]) # 無名列を削除
# countryとcompany_name列を削除
trains = trains.drop(columns = ["country", "company_name"])
# photo_idを文字列に変換しておく
trains["photo_id"] = list(map(str, trains["photo_id"]))
# 内部結合でtrainsに訂正した鉄道会社と国名を追加
df2 = pd.merge(trains, df, on="company_name0", how="inner")
# 並び変えてindex列を削除
df2 = df2.sort_values(['country','company_name','company_name0']).reset_index()
df2 = df2.drop(['index'], axis=1)
# reindexで列を並べ替え
df2 = df2.reindex(columns=[
"photo_id",
"country",
"company_name",
"形式",
"愛称",
"編成",
"路線",
"車両",
"alt",
"アクセス数",
"投稿日時",
"撮影場所",
"撮影日時",
"src",
"company_name0"
])
df2.to_csv(file_path) # ファイルに保存 (trains.csv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment