Skip to content

Instantly share code, notes, and snippets.

@nutszebra
Created November 21, 2015 15:41
Show Gist options
  • Save nutszebra/6a53618b09447f7a89d3 to your computer and use it in GitHub Desktop.
Save nutszebra/6a53618b09447f7a89d3 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#Link: http://www.cl.ecei.tohoku.ac.jp/nlp100/
"""
hightemp.txt: http://www.cl.ecei.tohoku.ac.jp/nlp100/data/hightemp.txt
は,日本の最高気温の記録を「都道府県」「地点」「℃」「日」のタブ区切り形式で格納したファイルである.
以下の処理を行うプログラムを作成し,hightemp.txtを入力ファイルとして実行せよ.
Question 13:
13. col1.txtとcol2.txtをマージ
12で作ったcol1.txtとcol2.txtを結合し,元のファイルの1列目と2列目をタブ区切りで並べたテキストファイルを作成せよ.
確認にはpasteコマンドを用いよ.
"""
import subprocess
import requests
import os
def download(url, dir, params={}):
dl = requests.get(url, params=params)
with file(dir, "wb") as f:
f.write(dl.content)
def readFile(path):
answer = ""
with file(path, "r") as f:
for line in f.readlines():
answer = answer + line
return answer
def remove(path):
if os.path.exists(path):
os.remove(path)
def convertPunctuationSymbol(punctuation):
if punctuation == "space" or punctuation == " ":
return " "
elif punctuation == "tab" or punctuation == "\t":
return "\t"
elif punctuation == "comma" or punctuation == ",":
return ","
elif punctuation == "period" or punctuation == ".":
return "."
elif punctuation == "double space" or punctuation == " ":
return " "
else:
return " "
def saveFile(array, path, punctuation = "space"):
punctuation = convertPunctuationSymbol(punctuation)
#array is empty
if len(array) == 0:
return None
#one column
if type(array[0]) == str:
howManyElementInLine = 0
else:
#multiple columns
howManyElementInLine = len(array[0])
with file(path, "wb") as f:
for i in xrange(0, len(array)):
#one columne
if howManyElementInLine == 0:
f.write(array[i] + "\n")
else:
#multiple column
for ii in xrange(0, howManyElementInLine - 1):
f.write(array[i][ii] + punctuation)
f.write(array[i][howManyElementInLine - 1] + "\n")
def encloseByQuotation(sentence):
if "'" in sentence:
quotation = '"'
else:
quotation = "'"
return quotation + sentence + quotation
def paste(array, path, punctuation = "space"):
punctuation = convertPunctuationSymbol(punctuation)
cmd = "paste -d" + encloseByQuotation(punctuation) + " " + " ".join(array)
with file(path, "wb") as f:
subprocess.call(cmd, stdout=f, shell=True)
#download hightemp.txt
download("http://www.cl.ecei.tohoku.ac.jp/nlp100/data/hightemp.txt","./hightemp.txt") if not os.path.exists("./hightemp.txt") else None
content = readFile("./hightemp.txt")
col1 = [val for line in content.split("\n") for val, index in zip(line.split("\t"), range(len(line))) if index == 0]
col2 = [val for line in content.split("\n") for val, index in zip(line.split("\t"), range(len(line))) if index == 1]
saveFile(col1, "./col1.txt")
saveFile(col2, "./col2.txt")
col1Saved = readFile("./col1.txt")
col2Saved = readFile("./col2.txt")
saveFile(zip(col1,col2), "./col12.txt", punctuation = "tab")
col12 = readFile("./col12.txt")
paste(["./col1.txt", "./col2.txt"], "./pasteCol12.txt", punctuation = "tab")
col12Paste = readFile("./pasteCol12.txt")
print("original file: \n" + content)
print("col1: \n" + "\n".join(col1) + "\n")
print("col2: \n" + "\n".join(col2) + "\n")
print("saved col1: \n" + col1Saved)
print("saved col2: \n" + col2Saved)
print("col1 was merged with col2: \n" + col12)
print("col1 was merged with col2 by paste command: \n" + col12Paste)
remove("./col1.txt")
remove("./col2.txt")
remove("./col12.txt")
remove("./pasteCol12.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment