Skip to content

Instantly share code, notes, and snippets.

@naturale0
Created July 19, 2019 10:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save naturale0/4be1bad9bbfa40e0ceaa26c0f798de0d to your computer and use it in GitHub Desktop.
Save naturale0/4be1bad9bbfa40e0ceaa26c0f798de0d to your computer and use it in GitHub Desktop.
# In []:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy as sp
A = np.random.normal(5, 1, 100)
B = np.random.normal(3, 1.5, 100)
C = np.random.normal(4, 2, 100)
df = pd.DataFrame(np.array([A, B, C]), index=["A", "B", "C"]).T
df.head(10)
df.plot(kind="kde", figsize=(8,5), title="Before applying quantile normalization")
#plt.savefig("before.png", transparent=False)
# In []:
# Quantile normalization
## 1. order values of each columns
sort_idx = np.argsort(df, axis=0)
orig_idx = np.argsort(sort_idx, axis=0)
sorted_A = df.A[sort_idx.A].reset_index(drop=True)
sorted_B = df.B[sort_idx.B].reset_index(drop=True)
sorted_C = df.C[sort_idx.C].reset_index(drop=True)
df_sorted = pd.concat([sorted_A, sorted_B, sorted_C], axis=1)
df_sorted.head(10)
## 2. (w/o reference) calculate median of each rows
medians = df_sorted.median(axis=1)
medians.head(10)
## 2. (w/ reference) calculate quantile of reference distribution
## in this case, 101-quantile of standard normal distribution
points = np.linspace(0, 1.0, 101, endpoint=False)
reference_quantile = sp.stats.norm.ppf(points)
reference_quantile = reference_quantile[1:]
## 3 (w/o ref). replace original values to calculated median
normalized_A = medians[orig_idx.A].reset_index(drop=True)
normalized_B = medians[orig_idx.B].reset_index(drop=True)
normalized_C = medians[orig_idx.C].reset_index(drop=True)
df_normalized = pd.DataFrame([normalized_A, normalized_B, normalized_C], index=["A", "B", "C"]).T
df_normalized.head(10)
df_normalized.plot(kind="kde", figsize=(8,5), title="After applying quantile normalization")
plt.savefig("after.png", transparent=False)
## 3 (w/ ref). replace original values to calculated median
normalized_A = reference_quantile[orig_idx.A]
normalized_B = reference_quantile[orig_idx.B]
normalized_C = reference_quantile[orig_idx.C]
df_normalized = pd.DataFrame([normalized_A, normalized_B, normalized_C], index=["A", "B", "C"]).T
df_normalized.head(10)
df_normalized.plot(kind="kde", figsize=(8,5), title="After applying quantile normalization")
plt.savefig("after_ref.png", transparent=False)
# In []:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment