Last active
November 17, 2019 17:30
-
-
Save sickel/94570897ed12eb1f0bba0c609312ce59 to your computer and use it in GitHub Desktop.
For å lese krysstabeller
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
from openpyxl import load_workbook | |
import sys | |
filenames=[] | |
for i in range(1,len(sys.argv)): | |
print(sys.argv[i]) | |
filenames.append(sys.argv[i]) | |
dictbook=load_workbook('NO_LAT.xlsx') | |
dictsheet=dictbook.active | |
names={} | |
for row in range(2,dictsheet.max_row+1): | |
try: | |
names[dictsheet.cell(row=row,column=1).value.strip().lower()]=dictsheet.cell(row=row,column=2).value.strip() | |
except: | |
pass | |
ruter=[] | |
dataset={} | |
rawdata=[] | |
for filename in filenames: | |
print(filename) | |
wb=load_workbook(filename) | |
sheets=wb.get_sheet_names() | |
for sheetname in sheets: | |
#if not (sheetname.startswith('NO') or sheetname.startswith('LAT')): | |
# continue | |
print(sheetname) | |
latin=sheetname.startswith('LAT') | |
sheet = wb.get_sheet_by_name(sheetname) | |
try: | |
parts=sheet["A1"].value.split(':') | |
except: | |
continue | |
if len(parts)<2: | |
continue | |
flate=parts[1].strip() | |
if flate=='': | |
continue | |
polid=sheet["C1"].value | |
if polid != None: | |
polid=str(polid).strip() | |
else: | |
print ("Unknown polid") | |
polid=sheetname | |
poltype=sheet["C2"].value | |
if poltype!=None: | |
poltype=poltype.strip() | |
else: | |
poltype=sheet["B2"].value | |
if poltype!=None: | |
poltype=poltype.strip() | |
else: | |
poltype="Unknown" | |
key="{}_{}_{}".format(flate,polid,poltype) | |
print(key) | |
last=sheet.max_row+1 | |
row=3 | |
ndet=0 | |
for row in range(row,last): | |
for col in (1,3,5,7,9): | |
det=sheet.cell(row=row,column=col+1).value | |
if det == None: | |
continue | |
det=str(det).strip() | |
if det.upper()=='X' or det.upper()=='Å': | |
det="1" | |
art=sheet.cell(row=row,column=col).value | |
if art==None: | |
continue | |
if latin: | |
try: | |
art=names[art.lower()] | |
except: | |
print(art+" missing") | |
pass | |
# print(art) | |
if not art in dataset.keys(): | |
dataset[art]={} | |
dataset[art][key]=det | |
ndet+=1 | |
workflate=flate | |
rawdata.append([flate,polid,poltype,art,det]) | |
if ndet>0: | |
ruter.append([flate,polid,poltype]) | |
arter=list(dataset.keys()) | |
arter.sort() | |
#print(dataset) | |
#print(arter) | |
print(ruter) | |
#print(latin) | |
seen = {} | |
dupes = [] | |
for a in ruter: | |
x='.'.join(a) | |
if x not in seen: | |
seen[x] = 1 | |
else: | |
if seen[x] == 1: | |
dupes.append(x) | |
seen[x] += 1 | |
print("duplicates:") | |
print(dupes) | |
outfilename='summary_'+workflate+'.csv' | |
csvfile=open(outfilename,'w',encoding='ISO-8859-1') | |
print(outfilename) | |
starts=['Flatenr:','ID nr:','Arealkode'] | |
joinchar=';' | |
for idx,start in enumerate(starts): | |
line=[start] | |
for r in ruter: | |
line.append(r[idx]) | |
#print(line) | |
csvfile.write(joinchar.join(line)+"\n") | |
for art in arter: | |
line=[art] | |
for r in ruter: | |
key="{}_{}_{}".format(r[0],r[1],r[2]) | |
if key in dataset[art].keys(): | |
line.append(str(dataset[art][key])) | |
else: | |
line.append('') | |
#print(line) | |
csvfile.write(joinchar.join(line)+"\n") | |
csvfile.close() | |
rfilename='rawdata_'+workflate+'.csv' | |
rfile=open(rfilename,'w') | |
rfile.write(joinchar.join(['flate','polygon','type','art','sett'])+'https://www.aftenposten.no/\n') | |
for r in rawdata: | |
rfile.write(joinchar.join(r)+"\n") | |
rfile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment