Created
February 6, 2018 09:23
-
-
Save ebergam/604d13a11902e8c89ddfd3855844f7cc to your computer and use it in GitHub Desktop.
Non-numeric pivot table reading data chunk by chunk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Beware of the Panda. | |
### 'You have no idea how tidy this is gonna get' | |
from pandas import * | |
import csv | |
import numpy as np | |
df = pandas.read_csv(‘data_raw.csv’, sep=” “, chunksize=5000) | |
df | |
appended_data = [] | |
for chunk in df: | |
pivot_table = chunk.pivot_table(index=[‘id’], | |
columns=[‘key’], | |
values=[‘value’], | |
aggfunc=lambda x: ‘ ‘.join(str(v) for v in x)) | |
appended_data.append(pivot_table) | |
appended_data = pandas.concat(appended_data, axis=0).reset_index() | |
appended_data.to_csv('data_clean.csv', sep=",") | |
#and if you wanna clean it a little bit where the chunk trunks it: | |
appended_data_clean = appended_data.groupby('id', sort=True).agg(np.sum) | |
appended_data_clean.to_csv('dati_clean_crunched.csv', sep=",") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment