Skip to content

Instantly share code, notes, and snippets.

@astrolitterbox
Created July 18, 2022 15:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save astrolitterbox/68b55fe05012ea349ea1ac5714654ba6 to your computer and use it in GitHub Desktop.
Save astrolitterbox/68b55fe05012ea349ea1ac5714654ba6 to your computer and use it in GitHub Desktop.
NOW DB cleanup
import pandas as pd
import numpy as np
taxa = pd.read_csv("Raw_NOW_data/now_export_locsp_2022-04-27.csv", delimiter="\t", na_values = ["\\N", "n", "N"], \
dtype={'MAX_AGE': 'float64', 'MIN_AGE': 'float64', 'LIDNUM': 'int'})
taxa = taxa.fillna(-999)
print(taxa.shape)
NA_g = taxa['GENUS']==-999
NA_f = taxa['FAMILY']==-999
empty_f = taxa['FAMILY']=="\\N"
empty_g = taxa['GENUS']=="\\N"
indet_genus = taxa['GENUS']=="indet."
Indet_genus = taxa['GENUS']=="Indet."
indet_fam = taxa['FAMILY']=="indet."
empty_lat = taxa['LAT']==-999
empty_long = taxa['LONG']==-999
not_genus = taxa['GENUS']=="gen."
not_genus_1 = taxa['GENUS']=="Gen."
inc_sedis_g = taxa['GENUS']=="incertae sedis"
inc_sedis_f = taxa['FAMILY']=="incertae sedis"
inc_sedis_ff = taxa['FAMILY']=="incertaesedis"
piscivore = taxa['DIET_2']=="piscivore" #Remove piscivores
inc_sedis_o = taxa['ORDER']=="incertae sedis"
bad_min_age = taxa['MIN_AGE'] == -999
bad_min_age_n = taxa['MIN_AGE'] == "\\N"
bad_max_age = taxa['MAX_AGE'] == -999
nomen_nudum_f = taxa['FAMILY'] == "nomen nudum"
nomen_nudum_g = taxa['GENUS'] == "nomen nudum"
bad_age_max = taxa['MAX_AGE'] > 100
bad_age_min = taxa['MIN_AGE'] > 70
zero_ages = taxa['MAX_AGE']+taxa['MIN_AGE'] == 0
#not_ok = indet_sp| NA_sp | empty_s | not_sp|\
not_ok = NA_g|NA_f| indet_genus |Indet_genus| not_genus |not_genus_1|empty_lat|empty_long|empty_g| empty_f \
|indet_fam|inc_sedis_o| inc_sedis_f| inc_sedis_ff|inc_sedis_g | bad_max_age | bad_min_age|zero_ages|\
bad_min_age_n| bad_age_max| bad_age_min|nomen_nudum_f | nomen_nudum_g| piscivore
taxa =taxa[~not_ok]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment