Created
August 18, 2021 21:37
-
-
Save d-bohn/6461ce0d68ef2e99dea05c056398a04e to your computer and use it in GitHub Desktop.
Quick Fix pyMturkR::GetAssignments()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
get_all_assignments2 <- | |
function(assignment = NULL, | |
hit = NULL, | |
hit.type = NULL, | |
annotation = NULL, | |
status = NULL, | |
results = as.integer(100), | |
pagetoken = NULL, | |
get.answers = FALSE, | |
persist.on.error = FALSE, | |
verbose = getOption('pyMTurkR.verbose', TRUE)) { | |
GetClient() # Boto3 client | |
if (as.numeric(results) < 1 || as.numeric(results) > 100) { | |
stop("'pagesize' must be in range (1 to 100)") | |
} | |
# Check that one of the params for lookup was provided | |
if (all(is.null(assignment) & is.null(hit) & | |
is.null(hit.type) & is.null(annotation))) { | |
stop("Must provide 'assignment' xor 'hit' xor 'hit.type' xor 'annotation'") | |
} else if (!is.null(assignment)) { | |
# Lookup by assignments | |
# For each assignment... | |
for (i in 1:length(assignment)) { | |
response <- | |
try(pyMTurkR$Client$get_assignment(AssignmentId = assignment[i]), | |
silent = !verbose) | |
QualificationRequirements <- list() | |
if (class(response) != "try-error") { | |
tmp <- ToDataFrameAssignment(response$Assignment) | |
a <- tmp$assignments | |
ans <- tmp$answers | |
if (i == 1) { | |
Assignments <- a | |
Answers <- ans | |
} else { | |
Assignments <- rbind(Assignments, a) | |
Answers <- rbind(Answers, ans) | |
} | |
if (verbose) { | |
message("Assignment ", assignment[i], " Retrieved") | |
} | |
} | |
} | |
} else { | |
# Search for HITs that match criteria given | |
if (!is.null(hit)) { | |
# First we need to get a list of HITs | |
if (is.factor(hit)) { | |
hit <- as.character(hit) | |
} | |
hitlist <- hit | |
} else if (!is.null(hit.type)) { | |
# Search by HIT Type | |
if (is.factor(hit.type)) { | |
hit.type <- as.character(hit.type) | |
} | |
hitsearch <- SearchHITs(verbose = FALSE) | |
hitlist <- | |
hitsearch$HITs$HITId[hitsearch$HITs$HITTypeId %in% hit.type] | |
if (length(hitlist) == 0) { | |
stop("No HITs found matching HITTypeId") | |
} | |
} else if (!is.null(annotation)) { | |
# Search by HIT Annotation | |
if (is.na(annotation)) { | |
stop("Annotation is NA") | |
} | |
if (is.factor(annotation)) { | |
annotation <- as.character(annotation) | |
} | |
hitsearch <- SearchHITs(verbose = FALSE) | |
hitlist <- | |
hitsearch$HITs$HITId[grepl(annotation, hitsearch$HITs$RequesterAnnotation)] | |
if (length(hitlist) == 0) { | |
stop("No HITs found matching Annotation") | |
} | |
} | |
if (length(hitlist) == 0) { | |
stop("No HITs found for HITType") | |
} | |
if (!is.null(status)) { | |
if (!all(status %in% c("Approved", "Rejected", "Submitted"))) { | |
stop( | |
"Status must be vector containing one or more of: 'Approved', 'Rejected', 'Submitted'" | |
) | |
} | |
} else { | |
status <- c("Approved", "Rejected", "Submitted") | |
} | |
batch_helper_list_assignments <- | |
function(batchhit, | |
pagetoken = NULL, | |
num_retries = 1) { | |
if (!is.null(pagetoken)) { | |
# Use page token if given | |
response <- | |
try(pyMTurkR$Client$list_assignments_for_hit( | |
HITId = batchhit, | |
NextToken = pagetoken, | |
MaxResults = as.integer(results), | |
AssignmentStatuses = as.list(status) | |
), | |
silent = !verbose) | |
} else { | |
response <- | |
try(pyMTurkR$Client$list_assignments_for_hit( | |
HITId = batchhit, | |
MaxResults = as.integer(results), | |
AssignmentStatuses = as.list(status) | |
), | |
silent = !verbose) | |
} | |
# Validity check response | |
if (class(response) == "try-error" & persist.on.error) { | |
# If the response was an error, then we should try again | |
# but stop after 5 attempts | |
message(" Error. Trying again. Attempt #", | |
num_retries, | |
" for HIT: ", | |
batchhit) | |
message(" Waiting a few seconds before retrying...") | |
Sys.sleep(5) | |
num_retries <- num_retries + 1 | |
response <- | |
batch_helper_list_assignments( | |
batchhit = batchhit, | |
pagetoken = pagetoken, | |
num_retries = num_retries | |
) | |
if (num_retries >= 5) { | |
stop( | |
paste0( | |
"Failed after 5 attempts to fetch list of assignments for HIT: ", | |
batchhit | |
) | |
) | |
} | |
} else { | |
return(response) | |
} | |
} | |
# Batch process function | |
batch <- function(batchhit, pagetoken = NULL) { | |
response <- | |
batch_helper_list_assignments(batchhit = batchhit, pagetoken = pagetoken) | |
assignments <- response$Assignments | |
tmpAssignments <- NULL | |
tmpAnswers <- NULL | |
# For each assignment... | |
if (length(assignments) > 0) { | |
for (i in 1:length(assignments)) { | |
tmp <- ToDataFrameAssignment(assignments[[i]]) | |
tmpAssignments <- rbind(tmpAssignments, tmp$assignments) | |
tmpAnswers <- rbind(tmpAnswers, tmp$answers) | |
if (verbose) { | |
message("Assignment ", | |
assignments[[i]]$AssignmentId, | |
" Retrieved") | |
} | |
} | |
} else { | |
return(NULL) | |
} | |
# Update page token | |
if (!is.null(response$NextToken)) { | |
pagetoken <- response$NextToken | |
} | |
# Update page token | |
if (!is.null(response$NumResults)) { | |
numresults <- response$NumResults | |
} else { | |
numresults <- 0 | |
} | |
return( | |
list( | |
Assignments = tmpAssignments, | |
Answers = tmpAnswers, | |
NumResults = numresults, | |
NextToken = pagetoken | |
) | |
) | |
} | |
# Keep a running total of all Assignments fetched | |
runningtotal <- 0 | |
Assignments <- | |
emptydf( | |
nrow = 0, | |
ncol = 11, | |
c( | |
'AssignmentId', | |
'WorkerId', | |
'HITId', | |
'AssignmentStatus', | |
'AutoApprovalTime', | |
'AcceptTime', | |
'SubmitTime', | |
'ApprovalTime', | |
'RejectionTime', | |
'RequesterFeedback', | |
'Answer' | |
) | |
) | |
Answers <- | |
emptydf( | |
0, | |
9, | |
c( | |
"AssignmentId", | |
"WorkerId", | |
"HITId", | |
"QuestionIdentifier", | |
"FreeText", | |
"SelectionIdentifier", | |
"OtherSelectionField", | |
"UploadedFileKey", | |
"UploadedFileSizeInBytes" | |
) | |
) | |
total.results.found <- 0 | |
# Run batch over hitlist | |
for (i in 1:length(hitlist)) { | |
hit <- hitlist[i] | |
pagetoken <- NULL | |
# Fetch first page | |
response <- batch(hit, pagetoken) | |
if (!is.null(response)) { | |
results.found <- response$NumResults | |
} else { | |
results.found <- 0 | |
} | |
total.results.found <- total.results.found + results.found | |
to.return <- response | |
pagetoken <- response$NextToken | |
Assignments <- rbind(Assignments, to.return$Assignments) | |
Answers <- rbind(Answers, to.return$Answers) | |
# if (total.results.found >= results) { | |
# break | |
# } | |
while (!is.null(response$NextToken) # & | |
# results > total.results.found | |
) { | |
response <- batch(hit, pagetoken) | |
results.found <- response$NumResults | |
to.return <- response | |
# Update if response found results | |
if (!is.null(response)) { | |
pagetoken <- response$NextToken | |
} else { | |
results.found <- 0 | |
} | |
total.results.found <- total.results.found + results.found | |
Assignments <- rbind(Assignments, to.return$Assignments) | |
Answers <- rbind(Answers, to.return$Answers) | |
} | |
} | |
} | |
if (verbose) { | |
message("\n", nrow(Assignments), " Assignments Retrieved") | |
} | |
Assignments$Answer <- NULL | |
if (get.answers == TRUE) { | |
return(list(Assignments = Assignments, Answers = Answers)) | |
} else { | |
return(Assignments) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment