Skip to content

Instantly share code, notes, and snippets.

@jamm1985
Last active Jul 24, 2020
Embed
What would you like to do?
Aggregate (count) catalog data by year/month and plot it
"""
File: catalog.py
Author: Andrey Stepnov
Email: myjamm@gmail.com, a.stepnov@geophystech.ru
Github: https://github.com/jamm1985
Description: Aggregate (count) catalog data by year/month and plot it
Catalog sources: https://earthquake.usgs.gov, https://eqalert.ru
"""
import pandas as pd
import matplotlib as mlp
import pylab as plt
def parse_eqalert_ru(xlsx):
"""read xlsx, clean and save data to dataframe
:xlsx: string, input XLSX file
:returns: pandas dataframe
"""
data = pd.read_excel(xlsx)
data = data[['event_datetime',
'lat', 'lon', 'depth',
'mag', 'ep_dis', 'msk64_value', 'sta_num']]
data['event_datetime'] = pd.to_datetime(data['event_datetime'], format='%Y-%m-%d %H:%M:%S')
return data
def parse_usgs_gov(csv):
"""read csv file, clean and save the data to dataframe
:csv: string, input csv file
:returns: pandas dataframe
"""
data = pd.read_csv(csv)
data['time'] = pd.to_datetime(data['time'])
data = data[['time',
'latitude', 'longitude', 'depth',
'mag', 'dmin', 'nst']]
data.columns = ['event_datetime', 'lat', 'lon', 'depth', 'mag', 'ep_dis', 'sta_num']
return data
## USGS catalog
data = parse_usgs_gov('2020-07-11-usgs-query.csv')
## OR
## EQALERT catalog
data = parse_eqalert_ru('reports_list_09-04-10072020.xlsx')
## selection by date, MAG etc
data = data[data['mag'] > 4.0]
data = data[data['event_datetime'] > '1960']
## group by month and by year
by_month = pd.DataFrame(data['event_datetime'].dt.strftime('%Y-%m'))
by_month['mag'] = data['mag']
by_month['max_mag'] = by_month.groupby(['event_datetime'])['mag'].transform(max)
by_month = by_month.groupby(['event_datetime', 'max_mag']).size().reset_index().rename(columns={0: 'counts'})
by_year = pd.DataFrame(data['event_datetime'].dt.strftime('%Y'))
by_year['mag'] = data['mag']
by_year['max_mag'] = by_year.groupby(['event_datetime'])['mag'].transform(max)
by_year = by_year.groupby(['event_datetime', 'max_mag']).size().reset_index().rename(columns={0: 'counts'})
## plot
by_month.plot.line(x='event_datetime', y=['counts','max_mag'], subplots=True)
plt.show()
by_year.plot.line(x='event_datetime', y=['counts','max_mag'], subplots=True)
plt.show()
## save groupped data
by_month.to_excel("by_month.xlsx")
by_year.to_excel("by_year.xlsx")
@jamm1985

This comment has been minimized.

Copy link
Owner Author

@jamm1985 jamm1985 commented Jul 24, 2020

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment