Skip to content

Instantly share code, notes, and snippets.

@m-note
Last active August 29, 2015 14:26
Show Gist options
  • Save m-note/6120d2ebb9b246e8e3c0 to your computer and use it in GitHub Desktop.
Save m-note/6120d2ebb9b246e8e3c0 to your computer and use it in GitHub Desktop.
# recordDietの結果をPandasにまとめる
import pandas as pd
import os.path
def cleanText(text):
cleaned = text.replace('\n', ' ') # 改行記号を除く
cleaned = cleaned.strip() # 半角スペースを除く
cleaned = cleaned.replace(" ", "") # 全角スペースを除く
return cleaned
def intoDataFrame(extracted):
#各発言ごとに、リスト形式のものをdictに直してからpandasに渡す
count = 1
for i in extracted:
if count == 1:
ext_df = pd.DataFrame([{cleanText(k): cleanText(v) for [k, v] in i}]) # If each dict represents a row, you could pass a list of dicts (http://goo.gl/t0uCHp)
else:
ext_df = ext_df.append(pd.DataFrame([{cleanText(k): cleanText(v) for [k, v] in i}])) # dataframeの結合絡み (http://goo.gl/8H6ER8)
count += 1
# indexの設定
ext_df.index = range(0, len(ext_df)) # cf. http://inaz2.hatenablog.com/entry/2013/04/09/002318
return ext_df
# ext_df = intoDataFrame(recordDiet(speaker="安倍晋三", any="アベノミクス", Dfrom="2015-03-01", Duntil="2015-05-20"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment