Skip to content

Instantly share code, notes, and snippets.

@sickel
Last active November 17, 2019 17:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sickel/94570897ed12eb1f0bba0c609312ce59 to your computer and use it in GitHub Desktop.
Save sickel/94570897ed12eb1f0bba0c609312ce59 to your computer and use it in GitHub Desktop.
For å lese krysstabeller
#!/usr/bin/python3
from openpyxl import load_workbook
import sys
filenames=[]
for i in range(1,len(sys.argv)):
print(sys.argv[i])
filenames.append(sys.argv[i])
dictbook=load_workbook('NO_LAT.xlsx')
dictsheet=dictbook.active
names={}
for row in range(2,dictsheet.max_row+1):
try:
names[dictsheet.cell(row=row,column=1).value.strip().lower()]=dictsheet.cell(row=row,column=2).value.strip()
except:
pass
ruter=[]
dataset={}
rawdata=[]
for filename in filenames:
print(filename)
wb=load_workbook(filename)
sheets=wb.get_sheet_names()
for sheetname in sheets:
#if not (sheetname.startswith('NO') or sheetname.startswith('LAT')):
# continue
print(sheetname)
latin=sheetname.startswith('LAT')
sheet = wb.get_sheet_by_name(sheetname)
try:
parts=sheet["A1"].value.split(':')
except:
continue
if len(parts)<2:
continue
flate=parts[1].strip()
if flate=='':
continue
polid=sheet["C1"].value
if polid != None:
polid=str(polid).strip()
else:
print ("Unknown polid")
polid=sheetname
poltype=sheet["C2"].value
if poltype!=None:
poltype=poltype.strip()
else:
poltype=sheet["B2"].value
if poltype!=None:
poltype=poltype.strip()
else:
poltype="Unknown"
key="{}_{}_{}".format(flate,polid,poltype)
print(key)
last=sheet.max_row+1
row=3
ndet=0
for row in range(row,last):
for col in (1,3,5,7,9):
det=sheet.cell(row=row,column=col+1).value
if det == None:
continue
det=str(det).strip()
if det.upper()=='X' or det.upper()=='Å':
det="1"
art=sheet.cell(row=row,column=col).value
if art==None:
continue
if latin:
try:
art=names[art.lower()]
except:
print(art+" missing")
pass
# print(art)
if not art in dataset.keys():
dataset[art]={}
dataset[art][key]=det
ndet+=1
workflate=flate
rawdata.append([flate,polid,poltype,art,det])
if ndet>0:
ruter.append([flate,polid,poltype])
arter=list(dataset.keys())
arter.sort()
#print(dataset)
#print(arter)
print(ruter)
#print(latin)
seen = {}
dupes = []
for a in ruter:
x='.'.join(a)
if x not in seen:
seen[x] = 1
else:
if seen[x] == 1:
dupes.append(x)
seen[x] += 1
print("duplicates:")
print(dupes)
outfilename='summary_'+workflate+'.csv'
csvfile=open(outfilename,'w',encoding='ISO-8859-1')
print(outfilename)
starts=['Flatenr:','ID nr:','Arealkode']
joinchar=';'
for idx,start in enumerate(starts):
line=[start]
for r in ruter:
line.append(r[idx])
#print(line)
csvfile.write(joinchar.join(line)+"\n")
for art in arter:
line=[art]
for r in ruter:
key="{}_{}_{}".format(r[0],r[1],r[2])
if key in dataset[art].keys():
line.append(str(dataset[art][key]))
else:
line.append('')
#print(line)
csvfile.write(joinchar.join(line)+"\n")
csvfile.close()
rfilename='rawdata_'+workflate+'.csv'
rfile=open(rfilename,'w')
rfile.write(joinchar.join(['flate','polygon','type','art','sett'])+'https://www.aftenposten.no/\n')
for r in rawdata:
rfile.write(joinchar.join(r)+"\n")
rfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment