Skip to content

Instantly share code, notes, and snippets.

@debboutr
Last active October 24, 2020 06:11
Show Gist options
  • Save debboutr/7e24da9b35f871079ee33a48d9c6228e to your computer and use it in GitHub Desktop.
Save debboutr/7e24da9b35f871079ee33a48d9c6228e to your computer and use it in GitHub Desktop.
pandas operations for stacking data -- jon launspach
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import pandas as pd
uid = "GEO_ID" # <-- set the column that will hold the unique ID
directory = "." # <-- set the directory that you want to read from here
def is_csv(x):
return x.split(".")[-1] == "csv"
final = pd.DataFrame()
for f in filter(is_csv, os.listdir(directory)):
tbl = pd.read_csv(directory + "/" + f).set_index(uid)
if final.empty:
final = tbl
continue
if not final.index.isin(tbl.index).all(): # append uid's that don't exist
missing = tbl.loc[~tbl.index.isin(final.index),[]]
final = final.append(missing)
final.loc[tbl.index, tbl.columns] = tbl
final.to_csv('bologna.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment