Skip to content

Instantly share code, notes, and snippets.

@d-bohn
Created August 18, 2021 21:37
Show Gist options
  • Save d-bohn/6461ce0d68ef2e99dea05c056398a04e to your computer and use it in GitHub Desktop.
Save d-bohn/6461ce0d68ef2e99dea05c056398a04e to your computer and use it in GitHub Desktop.
Quick Fix pyMturkR::GetAssignments()
get_all_assignments2 <-
function(assignment = NULL,
hit = NULL,
hit.type = NULL,
annotation = NULL,
status = NULL,
results = as.integer(100),
pagetoken = NULL,
get.answers = FALSE,
persist.on.error = FALSE,
verbose = getOption('pyMTurkR.verbose', TRUE)) {
GetClient() # Boto3 client
if (as.numeric(results) < 1 || as.numeric(results) > 100) {
stop("'pagesize' must be in range (1 to 100)")
}
# Check that one of the params for lookup was provided
if (all(is.null(assignment) & is.null(hit) &
is.null(hit.type) & is.null(annotation))) {
stop("Must provide 'assignment' xor 'hit' xor 'hit.type' xor 'annotation'")
} else if (!is.null(assignment)) {
# Lookup by assignments
# For each assignment...
for (i in 1:length(assignment)) {
response <-
try(pyMTurkR$Client$get_assignment(AssignmentId = assignment[i]),
silent = !verbose)
QualificationRequirements <- list()
if (class(response) != "try-error") {
tmp <- ToDataFrameAssignment(response$Assignment)
a <- tmp$assignments
ans <- tmp$answers
if (i == 1) {
Assignments <- a
Answers <- ans
} else {
Assignments <- rbind(Assignments, a)
Answers <- rbind(Answers, ans)
}
if (verbose) {
message("Assignment ", assignment[i], " Retrieved")
}
}
}
} else {
# Search for HITs that match criteria given
if (!is.null(hit)) {
# First we need to get a list of HITs
if (is.factor(hit)) {
hit <- as.character(hit)
}
hitlist <- hit
} else if (!is.null(hit.type)) {
# Search by HIT Type
if (is.factor(hit.type)) {
hit.type <- as.character(hit.type)
}
hitsearch <- SearchHITs(verbose = FALSE)
hitlist <-
hitsearch$HITs$HITId[hitsearch$HITs$HITTypeId %in% hit.type]
if (length(hitlist) == 0) {
stop("No HITs found matching HITTypeId")
}
} else if (!is.null(annotation)) {
# Search by HIT Annotation
if (is.na(annotation)) {
stop("Annotation is NA")
}
if (is.factor(annotation)) {
annotation <- as.character(annotation)
}
hitsearch <- SearchHITs(verbose = FALSE)
hitlist <-
hitsearch$HITs$HITId[grepl(annotation, hitsearch$HITs$RequesterAnnotation)]
if (length(hitlist) == 0) {
stop("No HITs found matching Annotation")
}
}
if (length(hitlist) == 0) {
stop("No HITs found for HITType")
}
if (!is.null(status)) {
if (!all(status %in% c("Approved", "Rejected", "Submitted"))) {
stop(
"Status must be vector containing one or more of: 'Approved', 'Rejected', 'Submitted'"
)
}
} else {
status <- c("Approved", "Rejected", "Submitted")
}
batch_helper_list_assignments <-
function(batchhit,
pagetoken = NULL,
num_retries = 1) {
if (!is.null(pagetoken)) {
# Use page token if given
response <-
try(pyMTurkR$Client$list_assignments_for_hit(
HITId = batchhit,
NextToken = pagetoken,
MaxResults = as.integer(results),
AssignmentStatuses = as.list(status)
),
silent = !verbose)
} else {
response <-
try(pyMTurkR$Client$list_assignments_for_hit(
HITId = batchhit,
MaxResults = as.integer(results),
AssignmentStatuses = as.list(status)
),
silent = !verbose)
}
# Validity check response
if (class(response) == "try-error" & persist.on.error) {
# If the response was an error, then we should try again
# but stop after 5 attempts
message(" Error. Trying again. Attempt #",
num_retries,
" for HIT: ",
batchhit)
message(" Waiting a few seconds before retrying...")
Sys.sleep(5)
num_retries <- num_retries + 1
response <-
batch_helper_list_assignments(
batchhit = batchhit,
pagetoken = pagetoken,
num_retries = num_retries
)
if (num_retries >= 5) {
stop(
paste0(
"Failed after 5 attempts to fetch list of assignments for HIT: ",
batchhit
)
)
}
} else {
return(response)
}
}
# Batch process function
batch <- function(batchhit, pagetoken = NULL) {
response <-
batch_helper_list_assignments(batchhit = batchhit, pagetoken = pagetoken)
assignments <- response$Assignments
tmpAssignments <- NULL
tmpAnswers <- NULL
# For each assignment...
if (length(assignments) > 0) {
for (i in 1:length(assignments)) {
tmp <- ToDataFrameAssignment(assignments[[i]])
tmpAssignments <- rbind(tmpAssignments, tmp$assignments)
tmpAnswers <- rbind(tmpAnswers, tmp$answers)
if (verbose) {
message("Assignment ",
assignments[[i]]$AssignmentId,
" Retrieved")
}
}
} else {
return(NULL)
}
# Update page token
if (!is.null(response$NextToken)) {
pagetoken <- response$NextToken
}
# Update page token
if (!is.null(response$NumResults)) {
numresults <- response$NumResults
} else {
numresults <- 0
}
return(
list(
Assignments = tmpAssignments,
Answers = tmpAnswers,
NumResults = numresults,
NextToken = pagetoken
)
)
}
# Keep a running total of all Assignments fetched
runningtotal <- 0
Assignments <-
emptydf(
nrow = 0,
ncol = 11,
c(
'AssignmentId',
'WorkerId',
'HITId',
'AssignmentStatus',
'AutoApprovalTime',
'AcceptTime',
'SubmitTime',
'ApprovalTime',
'RejectionTime',
'RequesterFeedback',
'Answer'
)
)
Answers <-
emptydf(
0,
9,
c(
"AssignmentId",
"WorkerId",
"HITId",
"QuestionIdentifier",
"FreeText",
"SelectionIdentifier",
"OtherSelectionField",
"UploadedFileKey",
"UploadedFileSizeInBytes"
)
)
total.results.found <- 0
# Run batch over hitlist
for (i in 1:length(hitlist)) {
hit <- hitlist[i]
pagetoken <- NULL
# Fetch first page
response <- batch(hit, pagetoken)
if (!is.null(response)) {
results.found <- response$NumResults
} else {
results.found <- 0
}
total.results.found <- total.results.found + results.found
to.return <- response
pagetoken <- response$NextToken
Assignments <- rbind(Assignments, to.return$Assignments)
Answers <- rbind(Answers, to.return$Answers)
# if (total.results.found >= results) {
# break
# }
while (!is.null(response$NextToken) # &
# results > total.results.found
) {
response <- batch(hit, pagetoken)
results.found <- response$NumResults
to.return <- response
# Update if response found results
if (!is.null(response)) {
pagetoken <- response$NextToken
} else {
results.found <- 0
}
total.results.found <- total.results.found + results.found
Assignments <- rbind(Assignments, to.return$Assignments)
Answers <- rbind(Answers, to.return$Answers)
}
}
}
if (verbose) {
message("\n", nrow(Assignments), " Assignments Retrieved")
}
Assignments$Answer <- NULL
if (get.answers == TRUE) {
return(list(Assignments = Assignments, Answers = Answers))
} else {
return(Assignments)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment