Last active
July 2, 2024 14:40
-
-
Save spirrobe/04ccc1a6ac051bd0baabb59258752a18 to your computer and use it in GitHub Desktop.
This code contains functionality to get data from the Basler Atlas with Python to either use directly or to save as csv file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import datetime | |
| headers = {'User-Agent': 'Mozilla/5.0'} | |
| def data2csv(data, | |
| outpath='./', | |
| prefix='', | |
| suffix='', | |
| filesep='_', | |
| datasep=',', | |
| ext='.csv', | |
| quiet=True, | |
| ): | |
| import os | |
| if isinstance(data, dict) and 'data' not in data.keys(): | |
| of = [] | |
| for key in data.keys(): | |
| _of = data2csv(data[key], | |
| outpath=outpath, | |
| prefix=prefix, | |
| filesep=filesep, | |
| datasep=datasep, | |
| ext=ext, | |
| quiet=quiet, | |
| ) | |
| if isinstance(_of, list): | |
| of += _of | |
| else: | |
| of.append(_of) | |
| return of | |
| outpath = os.path.expanduser(outpath) | |
| filename = filesep.join([prefix, | |
| data['meta']['theme'], | |
| data['meta']['subtheme'], | |
| _nivgeo2nicename(data['meta']['nivgeo']), | |
| suffix, | |
| ]) | |
| filename = filename.lstrip(filesep).rstrip(filesep) + ext | |
| os.makedirs(outpath, exist_ok=True) | |
| if not quiet: | |
| print('Saving as', outpath+filename) | |
| with open(outpath+filename, 'w') as file_obj: | |
| file_obj.writelines(datasep.join(data['header']) + '\n') | |
| datalines = [datasep.join(map(lambda x: str(x), line)) | |
| for line in data['data']] | |
| file_obj.writelines('\n'.join(datalines) + '\n') | |
| return outpath+filename | |
| def _get_topic_url(): | |
| url = 'https://www.basleratlas.ch/GC_listIndics.php?lang=de' | |
| return url | |
| def _get_data_url(): | |
| # used to be the first url at around 2018 | |
| # "https://www.basleratlas.ch/GC_ronds.php?lang=de&js=1&indic=bevstruk.alter0&nivgeo=wge&ts=1&serie=2015" | |
| url = 'https://www.basleratlas.ch/GC_ronds.php' | |
| url = 'https://www.basleratlas.ch/GC_indic.php' | |
| return url | |
| def _get_references_url(): | |
| # e.g. https://www.basleratlas.ch/GC_refdata.php?nivgeo=wbl&extent=extent1&lang=de | |
| url = 'https://www.basleratlas.ch/GC_refdata.php' | |
| return url | |
| def get_ref_data(nivgeo, slimmed=True): | |
| import requests | |
| payload = {'nivgeo': nivgeo, | |
| 'lang': 'de', 'extent': 'extent1'} | |
| refs = requests.get(_get_references_url(), | |
| params=payload, | |
| headers=headers) | |
| refdata = refs.json()['content']['territories'] | |
| # contains the actual numbering in the main geographical administrative | |
| # dataset that it can be linked against | |
| if slimmed: | |
| return refdata['libgeo'], refdata['codgeo'] | |
| else: | |
| return refdata | |
| def _replace_error_value(value, error_value, replace_value): | |
| return replace_value if value <= error_value else value | |
| def get_basler_atlas_data(nivgeo, | |
| theme, | |
| subtheme, | |
| year, | |
| quiet=True, | |
| error_value=-9999, | |
| replace_value=0, | |
| empty_value='', | |
| ): | |
| import requests | |
| from requests.exceptions import JSONDecodeError | |
| payload = _params2payload(nivgeo, theme, subtheme, year=year) | |
| response = requests.get(_get_data_url(), | |
| params=payload, | |
| headers=headers) | |
| if response.ok: | |
| try: | |
| data = response.json()['content']['distribution'] | |
| except JSONDecodeError: | |
| print(f'issues with {response.url} and the payload {payload}') | |
| return None, None | |
| if not quiet: | |
| print(f'Got data from {response.url}, transforming ...') | |
| values = data['values'] | |
| if 'sortIndices' in data: | |
| # reported values are to be sorted, usual case? | |
| indices = data['sortIndices'] | |
| else: | |
| # reported values are sorted already, usual case for Bezirk? | |
| indices = range(len(values)) | |
| if isinstance(values, dict): | |
| keys = list(values.keys()) | |
| indices = range(len(values[keys[0]])) | |
| values = [[_replace_error_value(values[key][i], | |
| error_value, | |
| replace_value, | |
| ) | |
| for key in keys] | |
| for i in sorted(indices) | |
| ] | |
| data = values | |
| return keys, data | |
| else: | |
| data = [str(_replace_error_value(values[i], | |
| error_value, | |
| replace_value, | |
| ) | |
| ) | |
| for i in sorted(indices)] | |
| return None, data | |
| else: | |
| if not quiet: | |
| print(f'Request for {payload} failed') | |
| return None, None | |
| def _nivgeo2map(nivgeo): | |
| lookuptable = {'block': 'map5', | |
| 'wbl': 'map5', | |
| 'bezirk': 'map6', | |
| 'wbe': 'map6', | |
| 'viertel': 'map2', | |
| 'wvi': 'map2', | |
| 'gemeinde': 'map3', | |
| 'wge': 'map3', | |
| # 'wahlkreis': 'map7', | |
| # 'pwk': 'map7', | |
| } | |
| return lookuptable[nivgeo] | |
| def _get_nivgeos(): | |
| return 'wbe', 'wvi', 'wge', 'wbl' # , 'pwk' | |
| def _nicename2nivgeo(nivgeo=None): | |
| nicenames = _nivgeo2nicename(nivgeo=nivgeo) | |
| if nivgeo is None: | |
| return {v: k for k, v in nicenames.items()} | |
| else: | |
| return {nicenames: nivgeo} | |
| def _nivgeo2nicename(nivgeo=None): | |
| names = {'wbe': 'bezirk', | |
| 'wvi': 'viertel', | |
| 'wge': 'gemeinde', | |
| 'wbl': 'block', | |
| # 'pwk': 'wahlkreis', | |
| } | |
| if nivgeo is None: | |
| return names | |
| else: | |
| return names[nivgeo] | |
| def _params2payload(nivgeo, | |
| theme, | |
| subtheme, | |
| year=None): | |
| payload = {'lang': 'de', | |
| 'dataset': theme, | |
| 'indic': subtheme, | |
| 'view': _nivgeo2map(nivgeo), | |
| } | |
| if year is not None: | |
| payload['filters'] = 'jahr='+str(year) | |
| return payload | |
| def get_basler_atlas(start_year=1998, | |
| end_year=datetime.date.today().year, | |
| # population has different ages, | |
| # men m , women w and a grand total | |
| themes={'bevstruk': ['alter0', | |
| 'alter20', | |
| 'alter65', | |
| 'w', | |
| 'm', | |
| 'gesbev'], | |
| 'bevheim': ['anteil_ch', | |
| 'anteil_bs', | |
| 'anteil_al', | |
| 'anteil_bsanch', | |
| # "gesbev" has been replaced by | |
| 'gesbev_f', | |
| ], | |
| 'bau_lwg': ['anzahl_lwg', | |
| ], | |
| }, | |
| geographical_levels='all', | |
| error_value=-9999, | |
| replace_value=0, | |
| testing=False, | |
| quiet=True, | |
| ): | |
| _nicenames = _nicename2nivgeo() | |
| if geographical_levels == 'all': | |
| nivgeos = _get_nivgeos() | |
| else: | |
| if isinstance(geographical_levels, str): | |
| geographical_levels = [geographical_levels] | |
| _nivgeos = _get_nivgeos() | |
| nivgeos = [i if i in _nivgeos else _nicenames[i] | |
| for i in geographical_levels] | |
| assert all([i in _nivgeos or i in _nicenames | |
| for i in nivgeos]) | |
| # the defaults that we know of - there is wahlbezirke too on the | |
| # atlas but we don't particularly care about that one... | |
| data = {} | |
| # mapping of information from topic url to meta information entries | |
| info2meta = {'url': 'c_url_indicateur', | |
| 'name': 'c_lib_indicateur', | |
| 'short_name': 'c_lib_indicateur_court', | |
| 'unit': 'c_unite', | |
| 'source': 'c_source', | |
| 'description': 'c_desc_indicateur', | |
| } | |
| for nivgeo in nivgeos: | |
| refname, refnumber = get_ref_data(nivgeo) | |
| refdata = [[_refname, _refnumber] | |
| for _refname, _refnumber in zip(refname, refnumber)] | |
| # ids of the nivgeo is in refdata['codgeo'] | |
| # names of the nivgeo is in refdata['libgeo'] | |
| nicename = _nivgeo2nicename(nivgeo) | |
| for theme in themes: | |
| if not quiet: | |
| print(f'Working on {theme} for {_nivgeo2nicename(nivgeo)}') | |
| for subtheme in themes[theme]: | |
| if not quiet: | |
| print(f'Working on {theme}.{subtheme} for ', | |
| f'{_nivgeo2nicename(nivgeo)}') | |
| # force a copy of refdata otherwise we keep updating the old list of lists. | |
| container = {'data': [i.copy() for i in refdata], | |
| 'meta': {'theme': theme, | |
| 'nivgeo': nivgeo, | |
| 'subtheme': subtheme, | |
| 'theme': theme, }, | |
| 'header': ['referencename', 'referencenumber'], | |
| } | |
| topicinfo = get_basler_atlas_topics(theme=theme, | |
| subtheme=subtheme, | |
| fullinfo=True) | |
| for key, value in info2meta.items(): | |
| container['meta'][key] = topicinfo[theme][subtheme][value] | |
| # values will be nested, adjust header line with extra | |
| for year in range(start_year, end_year+1): | |
| if not quiet: | |
| print(f'Getting data for {year} for {theme}', | |
| f'{subtheme} for {_nivgeo2nicename(nivgeo)}') | |
| keys, thisdata = get_basler_atlas_data(nivgeo, | |
| theme, | |
| subtheme, | |
| year, | |
| quiet=quiet, | |
| ) | |
| if thisdata is None: | |
| if not quiet: | |
| print(f'Failure to get data for {year} for {theme}', | |
| f'{subtheme} for {_nivgeo2nicename(nivgeo)}') | |
| thisdata = [''] * len(container['data']) | |
| if keys is None: | |
| container['header'] += [f'{year}'] | |
| else: | |
| container['header'] += [f'{year}_{key}' | |
| for key in keys] | |
| for i, value in enumerate(thisdata): | |
| if not isinstance(value, list): | |
| value = [value] | |
| container['data'][i] += value | |
| if testing: | |
| break # year | |
| data[nicename+'_'+theme+'_'+subtheme] = container | |
| if testing: | |
| break # specific theme | |
| if testing: | |
| break # theme | |
| if testing: | |
| break # nivgeo | |
| return data | |
| def get_basler_atlas_topics(theme=None, | |
| subtheme=None, | |
| fullinfo=False): | |
| import requests | |
| if subtheme is not None and isinstance(subtheme, str): | |
| subtheme = [subtheme] | |
| payload = {"tree": "A01", 'lang': 'de'} | |
| if theme is not None: | |
| payload['theme'] = theme | |
| response = requests.get(_get_topic_url(), | |
| params=payload, | |
| headers=headers) | |
| topiclist = response.json()['content']['indics'] | |
| data = {} | |
| for topicinfo in topiclist: | |
| maintopic, subtopic = (topicinfo['c_id_dataset'], | |
| topicinfo['c_id_indicateur']) | |
| if maintopic not in data: | |
| if fullinfo: | |
| data[maintopic] = {} | |
| else: | |
| data[maintopic] = [] | |
| if subtheme is not None and subtopic not in subtheme: | |
| continue | |
| if fullinfo: | |
| data[maintopic][subtopic] = topicinfo | |
| else: | |
| data[maintopic] += [subtopic] | |
| return data | |
| if __name__ == '__main__': | |
| print(""" | |
| Some example usages: | |
| # get some information for the population for the years 200 to 2003 | |
| data = get_basler_atlas(end_year=2003, | |
| start_year=2000, | |
| themes={'bevheim': ['anteil_ch','anteil_bs', ]}, | |
| geographical_levels='wvi', | |
| quiet=True) | |
| # just get everything that is a predefined topic: | |
| themes={'bevstruk': ['alter0', | |
| 'alter20', | |
| 'alter65', | |
| 'w', | |
| 'm', | |
| 'gesbev'], | |
| 'bevheim': ['anteil_ch', | |
| 'anteil_bs', | |
| 'anteil_al', | |
| 'anteil_bsanch', | |
| # "gesbev" has been replaced by | |
| 'gesbev_f', | |
| ], | |
| 'bau_lwg': ['anzahl_lwg', | |
| ], | |
| } | |
| # limit just by years | |
| data = get_basler_atlas(themes=themes, start_year=2000, end_year=2010) | |
| # also save the data to csv files (by theme and subtheme) | |
| data2csv(data) | |
| # get some information regarding the available topics | |
| themes = get_basler_atlas_topics() | |
| """) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment