Skip to content

Instantly share code, notes, and snippets.

@percolator
Last active October 20, 2017 13:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save percolator/fba4574f877b3aa29ae5e40d35f919c1 to your computer and use it in GitHub Desktop.
Save percolator/fba4574f877b3aa29ae5e40d35f919c1 to your computer and use it in GitHub Desktop.
A sniplet generating data simulating a high throughput expression analysis experiment
import numpy as np
import numpy.random as npr
import pandas as pd
def generate_expression_data(n_analytes=100, n_samples=2, n_replicates=3, p_regulated=0.2, mean_offset=3.0, var=0.2, diff_var=2.0):
# Here we follow a convension, The first sample is the reference i.e. have all label 1
labels = npr.binomial(1, p_regulated, (n_analytes,n_samples-1))
template = np.hstack((np.zeros((n_analytes,1)),labels))
# We expand the template labels into several replicates
regulated = np.repeat(template,n_replicates, axis=1)
# If the reading is regulated, offset it with a random offset sampled from the normal distribution
offset = regulated*npr.normal(0,diff_var,(n_analytes,1))
# Model a differentexpression level for the different analytes
expr_level = np.ones((n_analytes,n_samples*n_replicates))*npr.normal(mean_offset,mean_offset,(n_analytes,1))
# add noice for each measurement
expression = npr.normal(offset+expr_level,var,(n_analytes,n_replicates*n_samples))
expression = 2**expression
analyte_names = ["a"+str(i+1) for i in range(n_analytes)]
sample_names = ["s"+str(i+1)+'_'+str(j+1) for i in range(n_samples) for j in range(n_replicates)]
# Create a dataframe for expression values
expr_df = pd.DataFrame(expression,columns=sample_names,index=analyte_names)
expr_df.loc["Sample",:] = [i+1 for i in range(n_samples) for j in range(n_replicates)]
# Create a dataframe with answers if the reading was modeled as differential or not
label_df = pd.DataFrame(template,columns=[i+1 for i in range(n_samples)],index=analyte_names)
return expr_df,label_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment