Skip to content

Instantly share code, notes, and snippets.

@nmalkin
Created September 22, 2022 23:57
Show Gist options
  • Save nmalkin/9bd7a28fbec3ac5a827a3881d2c594b1 to your computer and use it in GitHub Desktop.
Save nmalkin/9bd7a28fbec3ac5a827a3881d2c594b1 to your computer and use it in GitHub Desktop.
Sample grading script (for Prolific surveys and others)

Sample grading script (for Prolific surveys and others)

This is an example grading script I use for more easily reviewing survey responses. It spins up a webpage showing all the responses that still need to be reviewed, with Approve/Reject buttons for each one.

This particular script assumes the survey being graded is being run on the Prolific platform. It therefore looks for a separate Prolific export file (in ~/Downloads) and joins it with the provided data file (on the PROLIFIC_PID field) in order to only show responses that (1) are associated with a Prolific submission and (2) have not already been graded. This portion can be removed, if needed.

When you approve or reject a response through this script, the associated ID gets added to approved.txt or rejected.txt, respectively. The contents of these files can then by copied into the bulk action interface on Prolific.

Prerequisites

Install Python (3.6+) and the dependencies listed in requirements.txt (pip install -r requirements.txt)

Setup

  1. Name your data file data.csv or replace the get_data function with a way to obtain the data.
  2. Update QUESTION_IDS at the top of the script with the column names from your data.csv that you want to see on the webage.
  3. Ensure the Prolific export is in your Downloads, or update the leave_only_valid_responses function to pass through data.

Run

python grade.py

from datetime import datetime
from pathlib import Path
from string import Template
import pandas as pd
from bottle import route, run, post, static_file
from markdown import markdown
QUESTION_IDS = [
"question_a",
"question_b",
]
def get_data():
data = pd.read_csv("data.csv")
return data
def get_approved():
with open("approved.txt", "r") as f:
return set([line.strip() for line in f.readlines()])
def get_rejected():
with open("rejected.txt", "r") as f:
return set([line.strip() for line in f.readlines()])
def get_prolific_data():
# Find all Prolific export files
export_files = list(Path("~/Downloads").expanduser().glob("prolific_export_*.csv"))
if len(export_files) == 0:
raise RuntimeError("couldn't find Prolific export files on Downloads")
# Pick the export file that was last modified, assuming it's the latest one and should be used
sorted_export = sorted(export_files, key=lambda f: f.stat().st_mtime)
prolific_file = sorted_export[-1]
prolific_data = pd.read_csv(prolific_file)
return prolific_data
def leave_only_valid_responses(data):
# Load Prolific export
prolific_data = get_prolific_data()
# Join data with Prolific export
joined = pd.merge(
data,
prolific_data,
left_on="PROLIFIC_PID",
right_on="PROLIFIC_PID",
how="left",
)
# Retain only ungraded or unknown (according to Prolific)
filtered_data = joined[(joined["status"] == "AWAITING REVIEW")]
return filtered_data
TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>$title</title>
<link
rel="stylesheet"
href="https://unpkg.com/simpledotcss@2.1.1/simple.min.css"
integrity="sha384-qC703vPcX6cOHrtWqLaQlbDlZPbQwvoNRx745QTRM8EdQLH9P37LqhmdYUF09dQT"
crossorigin="anonymous"
/>
<script
src="https://unpkg.com/htmx.org@1.8.0"
integrity="sha384-cZuAZ+ZbwkNRnrKi05G/fjBX+azI9DNOkNYysZ0I/X5ZFgsmMiBXgDZof30F5ofc"
crossorigin="anonymous"
></script>
</head>
<body>
<div id="content">$content</div>
</body>
</html>
"""
def make_grading_html() -> str:
data = get_data()
text = f"""
# Free response answers that still need to be graded
_n={len(data)}, last updated: {datetime.now()}_
"""
# Flag duplicates
duplicates: pd.Series = data[data["PROLIFIC_PID"].duplicated()]["PROLIFIC_PID"]
text += f"## Duplicates\n{duplicates.to_frame().to_html()}\n"
# Filter out invalid responses
data = leave_only_valid_responses(data)
# Filter out those we've dealt with
data = data[
~(
data["PROLIFIC_PID"].isin(get_approved())
| data["PROLIFIC_PID"].isin(get_rejected())
)
]
# Prepare to display results
text += f"## Responses\n_{len(data)} left to grade_\n"
for row in data.itertuples():
id = getattr(row, "PROLIFIC_PID")
current = f"#### {id}\n"
for q in QUESTION_IDS:
response = getattr(row, q)
if pd.isnull(response):
continue
current += f"**{q}**\n"
current += f"{response} \n\n"
buttons = f"""<div id='buttons-{id}'>
<button hx-post="/approve/{id}"
hx-trigger="click"
hx-target="#buttons-{id}"
hx-swap="outerHTML"
style="background-color: green"
>
Approve
</button>
<button hx-post="/reject/{id}"
hx-trigger="click"
hx-target="#buttons-{id}"
hx-swap="outerHTML"
style="background-color: red"
>
Reject
</button>
</div>"""
text += f"{current}\n{buttons}\n----\n"
html = markdown(text)
document = Template(TEMPLATE).substitute(title="Pending results", content=html)
return document
@route("/")
def show_pending_completions():
return make_grading_html()
@post("/approve/<id>")
def approve_response(id):
with open("approved.txt", "a") as f:
f.write(id)
f.write("\n")
return '<span style="color:green">Approved</span>'
@post("/reject/<id>")
def reject_response(id):
with open("rejected.txt", "a") as f:
f.write(id)
f.write("\n")
return '<span style="color:red">Rejected</span>'
@route("/static/<filename>")
def server_static(filename):
return static_file(filename, root="static")
PORT = 8080
run(host="localhost", port=PORT, debug=True, reloader=True)
bottle==0.12.23
Markdown==3.4.1
pandas==1.5.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment