Skip to content

Instantly share code, notes, and snippets.

@gpratt
Last active August 29, 2015 13:57
Show Gist options
  • Save gpratt/9817854 to your computer and use it in GitHub Desktop.
Save gpratt/9817854 to your computer and use it in GitHub Desktop.
cluster_loading.py
analysis_dir = "/nas3/gpratt/projects/public_clip/analysis/v5"
experiment_mapping = {
"peaks" : "clipper",
"ripseeker" : "ripseeker",
"pyicoclip" : "pyicoclip",
"piranha" : "piranha",
"bed_notrim_ingenes_clusters_hg1950" : "kasey",
"bed_notrim_ingenes_clusters_mm950" : "kasey",
}
peak_files = glob.glob(os.path.join(analysis_dir, "*.bed"))
samples = itertools.groupby(sorted(peak_files), lambda x: os.path.basename(x).split(".")[0])
sample_dict = defaultdict(dict)
for sample, experiments in samples:
for experiment in experiments:
experiment_type = os.path.basename(experiment).split(".")[-2]
sample_dict[sample][experiment_mapping[experiment_type]] = experiment
sample_df = pd.DataFrame(sample_dict)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment