Skip to content

Instantly share code, notes, and snippets.

@poltys
Last active September 15, 2020 09:20
Show Gist options
  • Save poltys/13b9b432e15786035a80a7d265c3e475 to your computer and use it in GitHub Desktop.
Save poltys/13b9b432e15786035a80a7d265c3e475 to your computer and use it in GitHub Desktop.
In a given folder read all json files and map the columns in a dataframe
# in a given folder read all json files and map the columns in a dataframe
import pandas as pd
import os, json
import glob
import numpy as np
def read_path(path):
folder_name = path
all_files = glob.glob(path + "/*.json")
files = []
li = []
columns = []
for filename in all_files:
files.append(filename)
df = pd.read_json(filename)
li.append(df)
for dataframes in li:
col = dataframes.T.columns.to_list()
columns.append(col)
data_tuples = list(zip(files,columns))
df = pd.DataFrame.from_records(data_tuples, columns=[folder_name, 'list_column'])
# remove unwanted characters
# df[folder_name] = df[folder_name].str.strip('.json \ \n\t')
# df[folder_name] = df[folder_name].str.replace('-', '_').str.upper()
df.to_excel(path + '-mapping.xlsx')
return df
path = # input your path
folders = []
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
for folder in d:
folders.append(os.path.join(r, folder))
for f in folders:
print(f)
for f in folders:
read_path(f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment