Skip to content

Instantly share code, notes, and snippets.

@AmanouToona
Created December 14, 2020 12:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AmanouToona/290aeb6e5711b1280ed190a018cd33d7 to your computer and use it in GitHub Desktop.
Save AmanouToona/290aeb6e5711b1280ed190a018cd33d7 to your computer and use it in GitHub Desktop.
average
import pandas as pd
import numpy as np
import time
# nan を含むデータフレームを作成する ---------
# dataframe の作成
df = pd.DataFrame(np.random.rand(10_000_000).reshape(10_000_000, 1))
# 要素をランダムに nan に置き換える関数
def replace_nan(x, freq):
num_replace = int(np.random.normal(len(x) * freq))
x.iloc[np.random.choice(x.index, num_replace)] = np.nan
return x
# df の要素をランダムに nan に置き換える
freq_replace = 0.3
df = df.apply(replace_nan, args=(freq_replace, ))
df1 = df.copy()
df2 = df.copy()
print(f"size: {df.shape}\n")
# ここから本題 -------------------------------------
# スライドの方法 (たぶん)
t_start = time.time()
filtered = df1.dropna()
avg = np.mean(filtered)
t_end = time.time()
calc_time1 = t_end - t_start
print(f"time: {calc_time1:.3f}, ans: {avg}\n")
# 普通の方法
t_start = time.time()
avg = df2.mean()
t_end = time.time()
calc_time2 = t_end - t_start
print(f"time: {calc_time2:.3f}, ans: {avg}")
print(f"{(calc_time1 / calc_time2):.3f} times faster\n")
# drop na に時間がかかるのか?
id_before_drop = id(df2)
t_start = time.time()
filtered = df2.dropna()
t_end = time.time()
id_after_drop = id(filtered)
print(f"time: {t_end - t_start:.3f}, id1: {id_before_drop}, id2: {id_after_drop}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment