Skip to content

Instantly share code, notes, and snippets.

@finswimmer
Last active June 16, 2018 15:03
Show Gist options
  • Save finswimmer/6ac3a5d9b78dd4a31b283cba29d9eec9 to your computer and use it in GitHub Desktop.
Save finswimmer/6ac3a5d9b78dd4a31b283cba29d9eec9 to your computer and use it in GitHub Desktop.
Join multiple pandas dataframes
import glob
import sys
import pandas
def read_filenames(names):
for arg in names:
if "*" in arg:
for file in glob.glob(arg):
yield file
else:
yield arg
data = []
for sample in read_filenames(sys.argv[1:]):
frame = pandas.read_csv(
sample,
sep="\t",
header=None,
names=["chr", "strand", "coord", sample],
index_col=[0, 1, 2]
)
data.append(frame)
data_joined = data[0].join(data[1:], how='outer')
sum = data_joined[data_joined.columns].sum(axis=1)
data_joined.insert(0, "scoreSum", sum)
print(data_joined.to_csv(
sep="\t",
na_rep=0,
float_format='%.0f'
))
@finswimmer
Copy link
Author

Answer on biostars question: https://www.biostars.org/p/317165

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment