Skip to content

Instantly share code, notes, and snippets.

@yudetamago
Created November 21, 2014 21:17
Show Gist options
  • Save yudetamago/f3473696565642b6b2bf to your computer and use it in GitHub Desktop.
Save yudetamago/f3473696565642b6b2bf to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
class DataFormatter:
"""
データを読み込んで整形して返すクラスです。
"""
__NALimit = 3
@classmethod
def fromCSV(cls, fileName):
"""
CSVからデータを読み込んで整形する。
"""
df = pd.read_csv(fileName)
df = cls.__exclude(df, cls.__NALimit)
df = cls.__interpolate(df)
return df
@classmethod
def __exclude(cls, df, limit):
"""
欠損値がlimit数以上のレコードを除外する。
"""
nanToInt = lambda x:(1 if np.isnan(x) else 0)
return df[df.applymap(nanToInt).sum(axis=1).apply(lambda x:x<=limit)]
@classmethod
def __interpolate(cls, df):
"""
欠損値を平均値で補完する。
"""
return df.fillna(np.round(df.mean()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment