Last active
February 22, 2021 22:50
-
-
Save 7yl4r/1f9c16e0c745f793909ac7e986e1a2de to your computer and use it in GitHub Desktop.
Creates an OBJECTID and Sample_ID for "child" rows that don't have them using "parent" rows as lookup by lat+lon+date.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Creates an OBJECTID and Sample_ID for "child" rows that don't have them using "parent" rows as lookup by lat+lon+date. | |
# """ | |
# Child rows are identified by OBJECTID == 0. | |
# Pseudocode: | |
# read in OJBECT != 0 lat/lon/date & IDs map & + increment starting @ 1 | |
# for row OJBECT == 0: | |
# OJBECT_ID = lookup lat / lon / date in our map | |
# """ | |
# | |
# NOTE: this doesn't work. We gave up after finding that the subsample rows were not actually subsamples. | |
library(readr) | |
library(lubridate) | |
HABS_FFW_HABSOS_KK <- read_csv("~/OBIS/NOAA/HABSOS/HABS_FFW_HABSOS_KK.csv") | |
# convert date string to date object | |
HABS_FFW_HABSOS_KK$eventDate <- mdy(HABS_FFW_HABSOS_KK$Sample_Dat) | |
# TODO: time is in a separate column, leaving out for now to speed up processing | |
# read in & create lookup table | |
lookup_table_df <- HABS_FFW_HABSOS_KK[c('Lat', 'Lon', 'eventDate', 'OBJECTID', 'Sample_ID')] | |
lookup_table_df <- lookup_table_df[which(lookup_table_df$OBJECTID != 0),] | |
lookup_table_df['subsample_count'] <- 1 | |
# assign child row IDs using lookup table | |
subsample_count <- 0 | |
mystery_sample_count <- 0 | |
for (i in 1:nrow(HABS_FFW_HABSOS_KK)){ | |
if (HABS_FFW_HABSOS_KK[i,]$OBJECTID == 0){ # if child row | |
child_row <- HABS_FFW_HABSOS_KK[i,] | |
parent_i <- which( | |
lookup_table_df$Lat == child_row$Lat & | |
lookup_table_df$Lon == child_row$Lon & | |
lookup_table_df$eventDate == child_row$eventDate | |
) | |
parent_row <- lookup_table_df[parent_i,] | |
if (nrow(parent_row) == 0){ # if no matching parent found | |
# use uuid | |
# skip | |
mystery_sample_count <- mystery_sample_count + 1 | |
} else { | |
# NOTE: do these assignments work or does did child_row <- and parent_row <- create implicit copies? | |
HABS_FFW_HABSOS_KK[i,]$OBJECTID <- paste(parent_row$OBJECTID, parent_row$subsample_count, sep="_") | |
HABS_FFW_HABSOS_KK[i,]$Sample_ID <- paste(parent_row$Sample_ID, parent_row$subsample_count, sep="_") | |
lookup_table_df[parent_i,]$subsample_count <- parent_row$subsample_count + 1 | |
subsample_count <- subsample_count + 1 | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment