d-bohn/get_assignments.R

## get_assignments.R
get_all_assignments2 <-
  function(assignment = NULL,
           hit = NULL,
           hit.type = NULL,
           annotation = NULL,
           status = NULL,
           results = as.integer(100),
           pagetoken = NULL,
           get.answers = FALSE,
           persist.on.error = FALSE,
           verbose = getOption('pyMTurkR.verbose', TRUE)) {
    GetClient() # Boto3 client

    if (as.numeric(results) < 1 || as.numeric(results) > 100) {
      stop("'pagesize' must be in range (1 to 100)")
    }

    # Check that one of the params for lookup was provided
    if (all(is.null(assignment) & is.null(hit) &
            is.null(hit.type) & is.null(annotation))) {
      stop("Must provide 'assignment' xor 'hit' xor 'hit.type' xor 'annotation'")

    } else if (!is.null(assignment)) {
      # Lookup by assignments

      # For each assignment...
      for (i in 1:length(assignment)) {
        response <-
          try(pyMTurkR$Client$get_assignment(AssignmentId = assignment[i]),
              silent = !verbose)
        QualificationRequirements <- list()

        if (class(response) != "try-error") {
          tmp <- ToDataFrameAssignment(response$Assignment)
          a <- tmp$assignments
          ans <- tmp$answers

          if (i == 1) {
            Assignments <- a
            Answers <- ans

          } else {
            Assignments <- rbind(Assignments, a)
            Answers <- rbind(Answers, ans)
          }

          if (verbose) {
            message("Assignment ", assignment[i], " Retrieved")
          }
        }
      }


    } else {
      # Search for HITs that match criteria given

      if (!is.null(hit)) {
        # First we need to get a list of HITs
        if (is.factor(hit)) {

          hit <- as.character(hit)
        }
        hitlist <- hit
      } else if (!is.null(hit.type)) {

        # Search by HIT Type
        if (is.factor(hit.type)) {
          hit.type <- as.character(hit.type)
        }

        hitsearch <- SearchHITs(verbose = FALSE)
        hitlist <-
          hitsearch$HITs$HITId[hitsearch$HITs$HITTypeId %in% hit.type]

        if (length(hitlist) == 0) {
          stop("No HITs found matching HITTypeId")
        }

      } else if (!is.null(annotation)) {

        # Search by HIT Annotation
        if (is.na(annotation)) {
          stop("Annotation is NA")
        }

        if (is.factor(annotation)) {
          annotation <- as.character(annotation)
        }

        hitsearch <- SearchHITs(verbose = FALSE)
        hitlist <-
          hitsearch$HITs$HITId[grepl(annotation, hitsearch$HITs$RequesterAnnotation)]

        if (length(hitlist) == 0) {
          stop("No HITs found matching Annotation")
        }
      }

      if (length(hitlist) == 0) {
        stop("No HITs found for HITType")
      }

      if (!is.null(status)) {
        if (!all(status %in% c("Approved", "Rejected", "Submitted"))) {
          stop(
            "Status must be vector containing one or more of: 'Approved', 'Rejected', 'Submitted'"
          )
        }
      } else {
        status <- c("Approved", "Rejected", "Submitted")
      }


      batch_helper_list_assignments <-
        function(batchhit,
                 pagetoken = NULL,
                 num_retries = 1) {
          if (!is.null(pagetoken)) {
            # Use page token if given
            response <-
              try(pyMTurkR$Client$list_assignments_for_hit(
                HITId = batchhit,
                NextToken = pagetoken,
                MaxResults = as.integer(results),
                AssignmentStatuses = as.list(status)
              ),
              silent = !verbose)
          } else {
            response <-
              try(pyMTurkR$Client$list_assignments_for_hit(
                HITId = batchhit,
                MaxResults = as.integer(results),
                AssignmentStatuses = as.list(status)
              ),
              silent = !verbose)
          }

          # Validity check response
          if (class(response) == "try-error" & persist.on.error) {
            # If the response was an error, then we should try again
            # but stop after 5 attempts
            message("  Error. Trying again. Attempt #",
                    num_retries,
                    " for HIT: ",
                    batchhit)
            message("  Waiting a few seconds before retrying...")
            Sys.sleep(5)
            num_retries <- num_retries + 1
            response <-
              batch_helper_list_assignments(
                batchhit = batchhit,
                pagetoken = pagetoken,
                num_retries = num_retries
              )
            if (num_retries >= 5) {
              stop(
                paste0(
                  "Failed after 5 attempts to fetch list of assignments for HIT: ",
                  batchhit
                )
              )
            }
          } else {
            return(response)
          }

        }


      # Batch process function
      batch <- function(batchhit, pagetoken = NULL) {
        response <-
          batch_helper_list_assignments(batchhit = batchhit, pagetoken = pagetoken)
        assignments <- response$Assignments
        tmpAssignments <- NULL
        tmpAnswers <- NULL

        # For each assignment...
        if (length(assignments) > 0) {
          for (i in 1:length(assignments)) {
            tmp <- ToDataFrameAssignment(assignments[[i]])
            tmpAssignments <- rbind(tmpAssignments, tmp$assignments)
            tmpAnswers <- rbind(tmpAnswers, tmp$answers)

            if (verbose) {
              message("Assignment ",
                      assignments[[i]]$AssignmentId,
                      " Retrieved")
            }
          }
        } else {
          return(NULL)
        }

        # Update page token
        if (!is.null(response$NextToken)) {
          pagetoken <- response$NextToken
        }
        # Update page token
        if (!is.null(response$NumResults)) {
          numresults <- response$NumResults
        } else {
          numresults <- 0
        }

        return(
          list(
            Assignments = tmpAssignments,
            Answers = tmpAnswers,
            NumResults = numresults,
            NextToken = pagetoken
          )
        )

      }


      # Keep a running total of all Assignments fetched
      runningtotal <- 0

      Assignments <-
        emptydf(
          nrow = 0,
          ncol = 11,
          c(
            'AssignmentId',
            'WorkerId',
            'HITId',
            'AssignmentStatus',
            'AutoApprovalTime',
            'AcceptTime',
            'SubmitTime',
            'ApprovalTime',
            'RejectionTime',
            'RequesterFeedback',
            'Answer'
          )
        )

      Answers <-
        emptydf(
          0,
          9,
          c(
            "AssignmentId",
            "WorkerId",
            "HITId",
            "QuestionIdentifier",
            "FreeText",
            "SelectionIdentifier",
            "OtherSelectionField",
            "UploadedFileKey",
            "UploadedFileSizeInBytes"
          )
        )

      total.results.found <- 0

      # Run batch over hitlist
      for (i in 1:length(hitlist)) {
        hit <- hitlist[i]
        pagetoken <- NULL

        # Fetch first page
        response <- batch(hit, pagetoken)
        if (!is.null(response)) {
          results.found <- response$NumResults
        } else {
          results.found <- 0
        }
        total.results.found <- total.results.found + results.found
        to.return <- response
        pagetoken <- response$NextToken

        Assignments <- rbind(Assignments, to.return$Assignments)
        Answers <- rbind(Answers, to.return$Answers)

        # if (total.results.found >= results) {
        #   break
        # }

        while (!is.null(response$NextToken) # &
               # results > total.results.found
               ) {
          response <- batch(hit, pagetoken)
          results.found <- response$NumResults
          to.return <- response

          # Update if response found results
          if (!is.null(response)) {
            pagetoken <- response$NextToken
          } else {
            results.found <- 0
          }
          total.results.found <- total.results.found + results.found

          Assignments <- rbind(Assignments, to.return$Assignments)
          Answers <- rbind(Answers, to.return$Answers)

        }

      }

    }

    if (verbose) {
      message("\n", nrow(Assignments), " Assignments Retrieved")
    }
    Assignments$Answer <- NULL
    if (get.answers == TRUE) {
      return(list(Assignments = Assignments, Answers = Answers))
    } else {
      return(Assignments)
    }

  }
	get_all_assignments2 <-
	function(assignment = NULL,
	hit = NULL,
	hit.type = NULL,
	annotation = NULL,
	status = NULL,
	results = as.integer(100),
	pagetoken = NULL,
	get.answers = FALSE,
	persist.on.error = FALSE,
	verbose = getOption('pyMTurkR.verbose', TRUE)) {
	GetClient() # Boto3 client

	if (as.numeric(results) < 1 \|\| as.numeric(results) > 100) {
	stop("'pagesize' must be in range (1 to 100)")
	}

	# Check that one of the params for lookup was provided
	if (all(is.null(assignment) & is.null(hit) &
	is.null(hit.type) & is.null(annotation))) {
	stop("Must provide 'assignment' xor 'hit' xor 'hit.type' xor 'annotation'")

	} else if (!is.null(assignment)) {
	# Lookup by assignments

	# For each assignment...
	for (i in 1:length(assignment)) {
	response <-
	try(pyMTurkR$Client$get_assignment(AssignmentId = assignment[i]),
	silent = !verbose)
	QualificationRequirements <- list()

	if (class(response) != "try-error") {
	tmp <- ToDataFrameAssignment(response$Assignment)
	a <- tmp$assignments
	ans <- tmp$answers

	if (i == 1) {
	Assignments <- a
	Answers <- ans

	} else {
	Assignments <- rbind(Assignments, a)
	Answers <- rbind(Answers, ans)
	}

	if (verbose) {
	message("Assignment ", assignment[i], " Retrieved")
	}
	}
	}


	} else {
	# Search for HITs that match criteria given

	if (!is.null(hit)) {
	# First we need to get a list of HITs
	if (is.factor(hit)) {

	hit <- as.character(hit)
	}
	hitlist <- hit
	} else if (!is.null(hit.type)) {

	# Search by HIT Type
	if (is.factor(hit.type)) {
	hit.type <- as.character(hit.type)
	}

	hitsearch <- SearchHITs(verbose = FALSE)
	hitlist <-
	hitsearch$HITs$HITId[hitsearch$HITs$HITTypeId %in% hit.type]

	if (length(hitlist) == 0) {
	stop("No HITs found matching HITTypeId")
	}

	} else if (!is.null(annotation)) {

	# Search by HIT Annotation
	if (is.na(annotation)) {
	stop("Annotation is NA")
	}

	if (is.factor(annotation)) {
	annotation <- as.character(annotation)
	}

	hitsearch <- SearchHITs(verbose = FALSE)
	hitlist <-
	hitsearch$HITs$HITId[grepl(annotation, hitsearch$HITs$RequesterAnnotation)]

	if (length(hitlist) == 0) {
	stop("No HITs found matching Annotation")
	}
	}

	if (length(hitlist) == 0) {
	stop("No HITs found for HITType")
	}

	if (!is.null(status)) {
	if (!all(status %in% c("Approved", "Rejected", "Submitted"))) {
	stop(
	"Status must be vector containing one or more of: 'Approved', 'Rejected', 'Submitted'"
	)
	}
	} else {
	status <- c("Approved", "Rejected", "Submitted")
	}



	batch_helper_list_assignments <-
	function(batchhit,
	pagetoken = NULL,
	num_retries = 1) {
	if (!is.null(pagetoken)) {
	# Use page token if given
	response <-
	try(pyMTurkR$Client$list_assignments_for_hit(
	HITId = batchhit,
	NextToken = pagetoken,
	MaxResults = as.integer(results),
	AssignmentStatuses = as.list(status)
	),
	silent = !verbose)
	} else {
	response <-
	try(pyMTurkR$Client$list_assignments_for_hit(
	HITId = batchhit,
	MaxResults = as.integer(results),
	AssignmentStatuses = as.list(status)
	),
	silent = !verbose)
	}

	# Validity check response
	if (class(response) == "try-error" & persist.on.error) {
	# If the response was an error, then we should try again
	# but stop after 5 attempts
	message(" Error. Trying again. Attempt #",
	num_retries,
	" for HIT: ",
	batchhit)
	message(" Waiting a few seconds before retrying...")
	Sys.sleep(5)
	num_retries <- num_retries + 1
	response <-
	batch_helper_list_assignments(
	batchhit = batchhit,
	pagetoken = pagetoken,
	num_retries = num_retries
	)
	if (num_retries >= 5) {
	stop(
	paste0(
	"Failed after 5 attempts to fetch list of assignments for HIT: ",
	batchhit
	)
	)
	}
	} else {
	return(response)
	}

	}


	# Batch process function
	batch <- function(batchhit, pagetoken = NULL) {
	response <-
	batch_helper_list_assignments(batchhit = batchhit, pagetoken = pagetoken)
	assignments <- response$Assignments
	tmpAssignments <- NULL
	tmpAnswers <- NULL

	# For each assignment...
	if (length(assignments) > 0) {
	for (i in 1:length(assignments)) {
	tmp <- ToDataFrameAssignment(assignments[[i]])
	tmpAssignments <- rbind(tmpAssignments, tmp$assignments)
	tmpAnswers <- rbind(tmpAnswers, tmp$answers)

	if (verbose) {
	message("Assignment ",
	assignments[[i]]$AssignmentId,
	" Retrieved")
	}
	}
	} else {
	return(NULL)
	}

	# Update page token
	if (!is.null(response$NextToken)) {
	pagetoken <- response$NextToken
	}
	# Update page token
	if (!is.null(response$NumResults)) {
	numresults <- response$NumResults
	} else {
	numresults <- 0
	}

	return(
	list(
	Assignments = tmpAssignments,
	Answers = tmpAnswers,
	NumResults = numresults,
	NextToken = pagetoken
	)
	)

	}


	# Keep a running total of all Assignments fetched
	runningtotal <- 0

	Assignments <-
	emptydf(
	nrow = 0,
	ncol = 11,
	c(
	'AssignmentId',
	'WorkerId',
	'HITId',
	'AssignmentStatus',
	'AutoApprovalTime',
	'AcceptTime',
	'SubmitTime',
	'ApprovalTime',
	'RejectionTime',
	'RequesterFeedback',
	'Answer'
	)
	)

	Answers <-
	emptydf(
	0,
	9,
	c(
	"AssignmentId",
	"WorkerId",
	"HITId",
	"QuestionIdentifier",
	"FreeText",
	"SelectionIdentifier",
	"OtherSelectionField",
	"UploadedFileKey",
	"UploadedFileSizeInBytes"
	)
	)

	total.results.found <- 0

	# Run batch over hitlist
	for (i in 1:length(hitlist)) {
	hit <- hitlist[i]
	pagetoken <- NULL

	# Fetch first page
	response <- batch(hit, pagetoken)
	if (!is.null(response)) {
	results.found <- response$NumResults
	} else {
	results.found <- 0
	}
	total.results.found <- total.results.found + results.found
	to.return <- response
	pagetoken <- response$NextToken

	Assignments <- rbind(Assignments, to.return$Assignments)
	Answers <- rbind(Answers, to.return$Answers)

	# if (total.results.found >= results) {
	# break
	# }

	while (!is.null(response$NextToken) # &
	# results > total.results.found
	) {
	response <- batch(hit, pagetoken)
	results.found <- response$NumResults
	to.return <- response

	# Update if response found results
	if (!is.null(response)) {
	pagetoken <- response$NextToken
	} else {
	results.found <- 0
	}
	total.results.found <- total.results.found + results.found

	Assignments <- rbind(Assignments, to.return$Assignments)
	Answers <- rbind(Answers, to.return$Answers)

	}

	}

	}

	if (verbose) {
	message("\n", nrow(Assignments), " Assignments Retrieved")
	}
	Assignments$Answer <- NULL
	if (get.answers == TRUE) {
	return(list(Assignments = Assignments, Answers = Answers))
	} else {
	return(Assignments)
	}

	}