Skip to content

Instantly share code, notes, and snippets.

@mikkohei13
Created November 23, 2023 21:13
Show Gist options
  • Save mikkohei13/901fa3eea5c1c3f75e2bed68b4872cda to your computer and use it in GitHub Desktop.
Save mikkohei13/901fa3eea5c1c3f75e2bed68b4872cda to your computer and use it in GitHub Desktop.
Find temporal outliers in FinBIF simple datafile
# Made with ChatGPT GPT-4 2023-11-23
# Finds temporal outliers from a simple occurrence data file of a single species from FinBIF, with headers in English.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from shapely.geometry import Point
import geopandas as gpd
# Load the data
file_path = 'single_species_data.tsv' # Replace with your file path
data = pd.read_csv(file_path, sep='\t')
# Function to find outliers
def find_outliers(df, column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
return outliers
# Filter data for specific conditions
filtered_data = data[(data['Life stage'].isna()) | (data['Life stage'] == 'adult')]
filtered_data_same_day = filtered_data[filtered_data['Begin day of year'] == filtered_data['End day of year']]
# Find outliers in the filtered dataset
begin_day_outliers_filtered = find_outliers(filtered_data_same_day, 'Begin day of year')
end_day_outliers_filtered = find_outliers(filtered_data_same_day, 'End day of year')
# Combine the outliers and get unique observation identifiers
outlier_identifiers_filtered = pd.concat([begin_day_outliers_filtered, end_day_outliers_filtered])['Observation identifier'].unique()
print(outlier_identifiers_filtered.tolist())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment