Skip to content

Instantly share code, notes, and snippets.

@7yl4r
Last active February 22, 2021 22:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 7yl4r/1f9c16e0c745f793909ac7e986e1a2de to your computer and use it in GitHub Desktop.
Save 7yl4r/1f9c16e0c745f793909ac7e986e1a2de to your computer and use it in GitHub Desktop.
Creates an OBJECTID and Sample_ID for "child" rows that don't have them using "parent" rows as lookup by lat+lon+date.
# Creates an OBJECTID and Sample_ID for "child" rows that don't have them using "parent" rows as lookup by lat+lon+date.
# """
# Child rows are identified by OBJECTID == 0.
# Pseudocode:
# read in OJBECT != 0 lat/lon/date & IDs map & + increment starting @ 1
# for row OJBECT == 0:
# OJBECT_ID = lookup lat / lon / date in our map
# """
#
# NOTE: this doesn't work. We gave up after finding that the subsample rows were not actually subsamples.
library(readr)
library(lubridate)
HABS_FFW_HABSOS_KK <- read_csv("~/OBIS/NOAA/HABSOS/HABS_FFW_HABSOS_KK.csv")
# convert date string to date object
HABS_FFW_HABSOS_KK$eventDate <- mdy(HABS_FFW_HABSOS_KK$Sample_Dat)
# TODO: time is in a separate column, leaving out for now to speed up processing
# read in & create lookup table
lookup_table_df <- HABS_FFW_HABSOS_KK[c('Lat', 'Lon', 'eventDate', 'OBJECTID', 'Sample_ID')]
lookup_table_df <- lookup_table_df[which(lookup_table_df$OBJECTID != 0),]
lookup_table_df['subsample_count'] <- 1
# assign child row IDs using lookup table
subsample_count <- 0
mystery_sample_count <- 0
for (i in 1:nrow(HABS_FFW_HABSOS_KK)){
if (HABS_FFW_HABSOS_KK[i,]$OBJECTID == 0){ # if child row
child_row <- HABS_FFW_HABSOS_KK[i,]
parent_i <- which(
lookup_table_df$Lat == child_row$Lat &
lookup_table_df$Lon == child_row$Lon &
lookup_table_df$eventDate == child_row$eventDate
)
parent_row <- lookup_table_df[parent_i,]
if (nrow(parent_row) == 0){ # if no matching parent found
# use uuid
# skip
mystery_sample_count <- mystery_sample_count + 1
} else {
# NOTE: do these assignments work or does did child_row <- and parent_row <- create implicit copies?
HABS_FFW_HABSOS_KK[i,]$OBJECTID <- paste(parent_row$OBJECTID, parent_row$subsample_count, sep="_")
HABS_FFW_HABSOS_KK[i,]$Sample_ID <- paste(parent_row$Sample_ID, parent_row$subsample_count, sep="_")
lookup_table_df[parent_i,]$subsample_count <- parent_row$subsample_count + 1
subsample_count <- subsample_count + 1
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment