Created
January 13, 2016 07:38
-
-
Save Drunkar/3b6f88dd792e6a4adc8c to your computer and use it in GitHub Desktop.
csvからカラム名を外部ファイルで指定して取り出す
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import sys | |
import numpy as np | |
VALID_COLUMNS = [] | |
def main(): | |
inputfile = sys.argv[1] | |
outputfile = sys.argv[2] | |
valid_columns_csv = sys.argv[3] | |
column_mask = [] | |
# VALID_COLUMNSを読み込み | |
with open(valid_columns_csv) as fi: | |
for line in fi: | |
VALID_COLUMNS = line.rstrip().split(",") | |
break | |
# データを読み込みながら書き込んでいく | |
with open(inputfile) as fi: | |
with open(outputfile, "w") as fo: | |
for i, line in enumerate(fi): | |
row = line.rstrip().split(",") | |
if i == 0: | |
# VALID_COLUMNSに存在するなら1、そうでないなら0というリストを作る | |
column_mask = np.zeros(len(row), dtype=np.bool) | |
for j, label in enumerate(row): | |
if label in VALID_COLUMNS: | |
column_mask[j] = 1 | |
fo.write(",".join(VALID_COLUMNS) + "\n") | |
else: | |
row = np.array(row)[column_mask==1].tolist() | |
fo.write(",".join(row) + "\n") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment