ThomasG77/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Récupération des stations quotidiennes depuis les données Météo France

import json
import urllib.request
from glob import glob
import pandas as pd
import geopandas

dataset_id = '6569b51ae64326786e4e8e1a'
url = f'https://www.data.gouv.fr/api/1/datasets/{dataset_id}/'

with urllib.request.urlopen(url) as resp:
    json_content = json.load(resp)

urls = [resource.get('url') for resource in json_content.get('resources') if 'RR-T-Vent' in resource.get('url') and resource.get('type') != 'documentation']

mydict = {}
for url, dep in [[url, url.split('/')[-1].split('_')[1]] for url in urls]:
    if dep not in mydict:
        mydict[dep] = []
    mydict[dep].append(url)

for dep,values in mydict.items():
    frames = [pd.read_csv(url, compression='gzip', sep=';', quotechar='"') for url in values]
    df = pd.concat(frames)
    stations = df[['NUM_POSTE', 'NOM_USUEL', 'LAT', 'LON', 'ALTI', 'AAAAMMJJ']]
    stations['AAAAMMJJ'] = pd.to_datetime(stations['AAAAMMJJ'], format = '%Y%m%d')
    stations['MIN_DATE'] = stations.groupby(['NUM_POSTE'])['AAAAMMJJ'].transform('min')
    stations['MAX_DATE'] = stations.groupby(['NUM_POSTE'])['AAAAMMJJ'].transform('max')
    stations.drop(columns=['AAAAMMJJ'], inplace=True)
    stations.reset_index().drop_duplicates('NUM_POSTE').drop(columns=['index']).to_csv(f'stations-RR-T-Vent-dep-{dep}.csv', index=False)

files_stations_rr_t_vent = glob('stations-RR-T-Vent-dep-*.csv')
frames_stations_rr_t_vent = [pd.read_csv(input_file) for input_file in files_stations_rr_t_vent]
df_stations_rr_t_vent = pd.concat(frames_stations_rr_t_vent)
gdf_stations_rr_t_vent = geopandas.GeoDataFrame(
    df_stations_rr_t_vent, geometry=geopandas.points_from_xy(df_stations_rr_t_vent.LON, df_stations_rr_t_vent.LAT), crs="EPSG:4326"
)
gdf_stations_rr_t_vent.to_file('stations_rr_t_vent.geojson', driver='GeoJSON')

  
## get-stations-daily-climatology-infos.py
import json
import urllib.request
from glob import glob
import pandas as pd
import geopandas

dataset_id = '6569b51ae64326786e4e8e1a'
url = f'https://www.data.gouv.fr/api/1/datasets/{dataset_id}/'

with urllib.request.urlopen(url) as resp:
    json_content = json.load(resp)

urls = [resource.get('url') for resource in json_content.get('resources') if 'RR-T-Vent' in resource.get('url') and resource.get('type') != 'documentation']

mydict = {}
for url, dep in [[url, url.split('/')[-1].split('_')[1]] for url in urls]:
    if dep not in mydict:
        mydict[dep] = []
    mydict[dep].append(url)

for dep,values in mydict.items():
    frames = [pd.read_csv(url, compression='gzip', sep=';', quotechar='"') for url in values]
    df = pd.concat(frames)
    stations = df[['NUM_POSTE', 'NOM_USUEL', 'LAT', 'LON', 'ALTI', 'AAAAMMJJ']]
    stations['AAAAMMJJ'] = pd.to_datetime(stations['AAAAMMJJ'], format = '%Y%m%d')
    stations['MIN_DATE'] = stations.groupby(['NUM_POSTE'])['AAAAMMJJ'].transform('min')
    stations['MAX_DATE'] = stations.groupby(['NUM_POSTE'])['AAAAMMJJ'].transform('max')
    stations.drop(columns=['AAAAMMJJ'], inplace=True)
    stations.reset_index().drop_duplicates('NUM_POSTE').drop(columns=['index']).to_csv(f'stations-RR-T-Vent-dep-{dep}.csv', index=False)

files_stations_rr_t_vent = glob('stations-RR-T-Vent-dep-*.csv')
frames_stations_rr_t_vent = [pd.read_csv(input_file) for input_file in files_stations_rr_t_vent]
df_stations_rr_t_vent = pd.concat(frames_stations_rr_t_vent)
gdf_stations_rr_t_vent = geopandas.GeoDataFrame(
    df_stations_rr_t_vent, geometry=geopandas.points_from_xy(df_stations_rr_t_vent.LON, df_stations_rr_t_vent.LAT), crs="EPSG:4326"
)
gdf_stations_rr_t_vent.to_file('stations_rr_t_vent.geojson', driver='GeoJSON')


## stations_rr_t_vent.geojson

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              stations_rr_t_vent.geojson
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	import json
	import urllib.request
	from glob import glob
	import pandas as pd
	import geopandas

	dataset_id = '6569b51ae64326786e4e8e1a'
	url = f'https://www.data.gouv.fr/api/1/datasets/{dataset_id}/'

	with urllib.request.urlopen(url) as resp:
	json_content = json.load(resp)

	urls = [resource.get('url') for resource in json_content.get('resources') if 'RR-T-Vent' in resource.get('url') and resource.get('type') != 'documentation']

	mydict = {}
	for url, dep in [[url, url.split('/')[-1].split('_')[1]] for url in urls]:
	if dep not in mydict:
	mydict[dep] = []
	mydict[dep].append(url)

	for dep,values in mydict.items():
	frames = [pd.read_csv(url, compression='gzip', sep=';', quotechar='"') for url in values]
	df = pd.concat(frames)
	stations = df[['NUM_POSTE', 'NOM_USUEL', 'LAT', 'LON', 'ALTI', 'AAAAMMJJ']]
	stations['AAAAMMJJ'] = pd.to_datetime(stations['AAAAMMJJ'], format = '%Y%m%d')
	stations['MIN_DATE'] = stations.groupby(['NUM_POSTE'])['AAAAMMJJ'].transform('min')
	stations['MAX_DATE'] = stations.groupby(['NUM_POSTE'])['AAAAMMJJ'].transform('max')
	stations.drop(columns=['AAAAMMJJ'], inplace=True)
	stations.reset_index().drop_duplicates('NUM_POSTE').drop(columns=['index']).to_csv(f'stations-RR-T-Vent-dep-{dep}.csv', index=False)

	files_stations_rr_t_vent = glob('stations-RR-T-Vent-dep-*.csv')
	frames_stations_rr_t_vent = [pd.read_csv(input_file) for input_file in files_stations_rr_t_vent]
	df_stations_rr_t_vent = pd.concat(frames_stations_rr_t_vent)
	gdf_stations_rr_t_vent = geopandas.GeoDataFrame(
	df_stations_rr_t_vent, geometry=geopandas.points_from_xy(df_stations_rr_t_vent.LON, df_stations_rr_t_vent.LAT), crs="EPSG:4326"
	)
	gdf_stations_rr_t_vent.to_file('stations_rr_t_vent.geojson', driver='GeoJSON')