gpratt/gist:9817854

## gistfile1.txt
analysis_dir = "/nas3/gpratt/projects/public_clip/analysis/v5"

experiment_mapping = {
                      "peaks" : "clipper",
                      "ripseeker" : "ripseeker",
                      "pyicoclip" : "pyicoclip",
                      "piranha" : "piranha",
                      "bed_notrim_ingenes_clusters_hg1950" : "kasey",
                      "bed_notrim_ingenes_clusters_mm950"  : "kasey",
                      }

peak_files = glob.glob(os.path.join(analysis_dir, "*.bed"))
samples = itertools.groupby(sorted(peak_files), lambda x: os.path.basename(x).split(".")[0])

sample_dict = defaultdict(dict)
for sample, experiments in samples:
    for experiment in experiments:
        experiment_type = os.path.basename(experiment).split(".")[-2]
        sample_dict[sample][experiment_mapping[experiment_type]] = experiment

sample_df = pd.DataFrame(sample_dict)
	analysis_dir = "/nas3/gpratt/projects/public_clip/analysis/v5"

	experiment_mapping = {
	"peaks" : "clipper",
	"ripseeker" : "ripseeker",
	"pyicoclip" : "pyicoclip",
	"piranha" : "piranha",
	"bed_notrim_ingenes_clusters_hg1950" : "kasey",
	"bed_notrim_ingenes_clusters_mm950" : "kasey",
	}

	peak_files = glob.glob(os.path.join(analysis_dir, "*.bed"))
	samples = itertools.groupby(sorted(peak_files), lambda x: os.path.basename(x).split(".")[0])

	sample_dict = defaultdict(dict)
	for sample, experiments in samples:
	for experiment in experiments:
	experiment_type = os.path.basename(experiment).split(".")[-2]
	sample_dict[sample][experiment_mapping[experiment_type]] = experiment

	sample_df = pd.DataFrame(sample_dict)