Skip to content

Instantly share code, notes, and snippets.

@anderzzz
Created June 8, 2023 13:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anderzzz/8bdde0e8170078598a44cf9715be36fe to your computer and use it in GitHub Desktop.
Save anderzzz/8bdde0e8170078598a44cf9715be36fe to your computer and use it in GitHub Desktop.
import os
import pandas as pd
metadata_labels = {
'# River:': 'river',
'# Station:': 'station',
'# Latitude (DD):': 'latitude',
'# Longitude (DD):': 'longitude',
'# Catchment area (km≤):': 'catchment_area',
'# Altitude (m ASL):': 'altitude',
'# Next downstream station:': 'next_downstream_station',
'# Remarks:': 'remarks',
'# Owner of original data:': 'owner_of_original_data',
'# GRDC-No.:': 'grdc_number',
'# Unit of measure:': 'unit_of_measure',
}
metadata_list = []
folder_path = 'data'
# Loop over files in the folder
for file_name in os.listdir(folder_path):
file_path = os.path.join(folder_path, file_name)
metadata = {}
with open(file_path, 'r', errors='ignore') as file: # Open the file with error handling
lines = file.readlines()
for line in lines:
for key, value in metadata_labels.items():
if line.startswith(key):
metadata[value] = line.split(':')[1].strip()
metadata_list.append(metadata)
# Create a DataFrame from the collected metadata
metadata_df = pd.DataFrame(metadata_list)
metadata_df.set_index('grdc_number', inplace=True)
# Print the metadata DataFrame
print(metadata_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment