Skip to content

Instantly share code, notes, and snippets.

@Drunkar
Created January 13, 2016 07:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Drunkar/3b6f88dd792e6a4adc8c to your computer and use it in GitHub Desktop.
Save Drunkar/3b6f88dd792e6a4adc8c to your computer and use it in GitHub Desktop.
csvからカラム名を外部ファイルで指定して取り出す
# coding: utf-8
import sys
import numpy as np
VALID_COLUMNS = []
def main():
inputfile = sys.argv[1]
outputfile = sys.argv[2]
valid_columns_csv = sys.argv[3]
column_mask = []
# VALID_COLUMNSを読み込み
with open(valid_columns_csv) as fi:
for line in fi:
VALID_COLUMNS = line.rstrip().split(",")
break
# データを読み込みながら書き込んでいく
with open(inputfile) as fi:
with open(outputfile, "w") as fo:
for i, line in enumerate(fi):
row = line.rstrip().split(",")
if i == 0:
# VALID_COLUMNSに存在するなら1、そうでないなら0というリストを作る
column_mask = np.zeros(len(row), dtype=np.bool)
for j, label in enumerate(row):
if label in VALID_COLUMNS:
column_mask[j] = 1
fo.write(",".join(VALID_COLUMNS) + "\n")
else:
row = np.array(row)[column_mask==1].tolist()
fo.write(",".join(row) + "\n")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment