Skip to content

Instantly share code, notes, and snippets.

@Jim-Holmstroem
Last active March 6, 2021 16:44
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Jim-Holmstroem/f1bb7c7170846fdfb01b65f1dd30a4e3 to your computer and use it in GitHub Desktop.
Save Jim-Holmstroem/f1bb7c7170846fdfb01b65f1dd30a4e3 to your computer and use it in GitHub Desktop.
fix parquet read format to be pandas-like
from itertools import repeat
import pandas as pd
df = pd.DataFrame(
{
'accountData': [
[{'key': 'name' ,'value': 'jim'}, {'key': 'schlong' ,'value': '27' }],
[{'key': 'name' ,'value': 'cnagy'}, {'key': 'schlong' ,'value': '26' }],
],
'additionalData': [
[{'key': 'id' ,'value': 'jimbo'}, {'key': 'flash' ,'value': 'yes' }],
[{'key': 'id' ,'value': 'cnagius'}, {'key': 'flash' ,'value': 'no' }],
],
'meta': [
1,
0,
],
'eventIdThing': [
{ 'somewhere': 13 },
{ 'somewhere': 37 },
],
},
)
df.index = df.eventIdThing.map(lambda d: d['somewhere']).rename('EventId')
def dic(d):
return dict(map(
lambda kv: (
kv['key'],
kv['value']
),
d
))
df = pd.concat(
{
'accountData': df.accountData.map(dic).apply(pd.Series),
'additionalData': df.additionalData.map(dic).apply(pd.Series),
'meta': df.meta,
},
axis=1
)
tdf = df.T # or use the transpose instead if more convenient
def render(expr):
print(f'-----< {expr} >-----')
exec(f'print({expr})')
print()
list(map(
render,
[
'df',
'df.accountData.name',
'df.accountData.name[13]',
'tdf',
'tdf[13]',
]
))
@Jim-Holmstroem
Copy link
Author

-----<  df  >-----
        accountData         additionalData       meta
               name schlong             id flash meta
EventId
13              jim      27          jimbo   yes    1
37            cnagy      26        cnagius    no    0

-----<  df.accountData.name  >-----
EventId
13      jim
37    cnagy
Name: name, dtype: object

-----<  df.accountData.name[13]  >-----
jim

-----<  tdf  >-----
EventId                    13       37
accountData    name       jim    cnagy
               schlong     27       26
additionalData id       jimbo  cnagius
               flash      yes       no
meta           meta         1        0

-----<  tdf[13]  >-----
accountData     name         jim
                schlong       27
additionalData  id         jimbo
                flash        yes
meta            meta           1
Name: 13, dtype: object


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment