Skip to content

Instantly share code, notes, and snippets.

@gokulraja
Last active March 27, 2020 15:27
Show Gist options
  • Save gokulraja/c4bee6740f6e307a882a04fff1b9cff5 to your computer and use it in GitHub Desktop.
Save gokulraja/c4bee6740f6e307a882a04fff1b9cff5 to your computer and use it in GitHub Desktop.
import openpyxl
import pandas as pd
import json
import ast
wb = openpyxl.load_workbook(r'C:\Users\Ausu\Documents\C(Sharp) Projects\Sample_JsonGenMetadata.xltm')
def getSheetNameTable():
listofSheetName = []
worksheet = wb["Metadata"]
for tbl in worksheet._tables:
if(tbl.name == "sheetName"):
cellAddress = tbl.ref
data_rows =[]
for row in worksheet[cellAddress]:
data_cols = []
for cell in row:
data_cols.append(cell.value)
data_rows.append(data_cols)
for i in range(1,len(worksheet[cellAddress])):
for item in worksheet[cellAddress][i]:
listofSheetName.append(item.value)
return listofSheetName
def getMetaDataTableToDictionary():
requiredSheets = getSheetNameTable()
ws = wb["Metadata"]
for tbl in ws._tables:
if(tbl.name == "tbl_Metadata"):
cellAddress = tbl.ref
data_rows = []
for row in ws[cellAddress]:
data_cols = []
for cell in row:
data_cols.append(cell.value)
data_rows.append(data_cols)
df = pd.DataFrame(data_rows)
columnNames = df.iloc[0]
df = df[1:]
df.columns = columnNames
getDf = df[df.SheetName.isin(requiredSheets)]
for SheetName, df_sheetName in getDf.groupby('SheetName'):
sheetName = df_sheetName.iat[0,0]
print(df_sheetName.iat[0,0])
getJson(df_sheetName,sheetName)
def getIgnoreColumnList(columnIgnoreCell,totalLengthOfColumn):
totalcolumnNumber = []
totalcolumnNumber = [x for x in range(totalLengthOfColumn)]
if columnIgnoreCell is None:
ignore = totalcolumnNumber
return ignore
else:
if(isinstance(columnIgnoreCell, int)):
ignore = [columnIgnoreCell]
else:
test_ignore = columnIgnoreCell.split(",")
ignore = [int(i) for i in test_ignore]
totalcolumnNumber = set(totalcolumnNumber)
ignore = set(ignore)
return list(totalcolumnNumber - ignore)
def getJson(dfOfSheet,sheetName):
totalNameRanges = len(dfOfSheet.index)
overallDict = {}
overallDict["sheetName"] = str(sheetName)
for nameRang in range(totalNameRanges):
if(dfOfSheet.iat[nameRang,5] == "Yes"):
dictJson = {}
worksheet = wb[dfOfSheet.iat[nameRang,0]]
cellAddress = dfOfSheet.iat[nameRang,2]+":"+dfOfSheet.iat[nameRang,3]
for i in range(0,len(worksheet[cellAddress])):
# dictJson["RowNo"] = 1
listValue = []
dummyList = []
for item in worksheet[cellAddress][i]:
dummyList.append(str(item.value))
acceptedCol = getIgnoreColumnList(dfOfSheet.iat[nameRang,4],len(dummyList))
for actual in range(1,len(acceptedCol)):
listValue.append(str(dummyList[acceptedCol[actual]]))
dictJson[dummyList[0]] = json.dumps(listValue)
dictJson[dummyList[0]] = ast.literal_eval(dictJson[dummyList[0]])
listNew = [dictJson]
# print(listNew)
overallDict[dfOfSheet.iat[nameRang,1]+"("+str(dfOfSheet.iat[nameRang,4])+")"] = listNew
jsonStr = json.dumps(overallDict).replace("],","]},{")
if(nameRang):
jsonStr = jsonStr.replace("]}]},{","]}],")
# for rowno in range(1,len(worksheet[cellAddress])):
# print(len(worksheet[cellAddress]))
# rowN = rowno+1
# jsonStr = jsonStr.replace("},{",'"RowNo":'+str(rowN)+",")
# jsonStr = jsonStr.replace(']"',']},{"')
print(jsonStr)
getMetaDataTableToDictionary()
#v2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment