nmalkin/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Sample grading script (for Prolific surveys and others)

This is an example grading script I use for more easily reviewing survey responses. It spins up a webpage showing all the responses that still need to be reviewed, with Approve/Reject buttons for each one.
This particular script assumes the survey being graded is being run on the Prolific platform. It therefore looks for a separate Prolific export file (in ~/Downloads) and joins it with the provided data file (on the PROLIFIC_PID field) in order to only show responses that (1) are associated with a Prolific submission and (2) have not already been graded. This portion can be removed, if needed.
When you approve or reject a response through this script, the associated ID gets added to approved.txt or rejected.txt, respectively. The contents of these files can then by copied into the bulk action interface on Prolific.
Prerequisites

Install Python (3.6+) and the dependencies listed in requirements.txt (pip install -r requirements.txt)
Setup


Name your data file data.csv or replace the get_data function with a way to obtain the data.
Update QUESTION_IDS at the top of the script with the column names from your data.csv that you want to see on the webage.
Ensure the Prolific export is in your Downloads, or update the leave_only_valid_responses function to pass through data.

Run

python grade.py

  
## grade.py
from datetime import datetime
from pathlib import Path
from string import Template

import pandas as pd
from bottle import route, run, post, static_file
from markdown import markdown

QUESTION_IDS = [
    "question_a",
    "question_b",
]


def get_data():
    data = pd.read_csv("data.csv")
    return data


def get_approved():
    with open("approved.txt", "r") as f:
        return set([line.strip() for line in f.readlines()])


def get_rejected():
    with open("rejected.txt", "r") as f:
        return set([line.strip() for line in f.readlines()])


def get_prolific_data():
    # Find all Prolific export files
    export_files = list(Path("~/Downloads").expanduser().glob("prolific_export_*.csv"))
    if len(export_files) == 0:
        raise RuntimeError("couldn't find Prolific export files on Downloads")

    # Pick the export file that was last modified, assuming it's the latest one and should be used
    sorted_export = sorted(export_files, key=lambda f: f.stat().st_mtime)
    prolific_file = sorted_export[-1]

    prolific_data = pd.read_csv(prolific_file)
    return prolific_data


def leave_only_valid_responses(data):
    # Load Prolific export
    prolific_data = get_prolific_data()

    # Join data with Prolific export
    joined = pd.merge(
        data,
        prolific_data,
        left_on="PROLIFIC_PID",
        right_on="PROLIFIC_PID",
        how="left",
    )

    # Retain only ungraded or unknown (according to Prolific)
    filtered_data = joined[(joined["status"] == "AWAITING REVIEW")]
    return filtered_data


TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>$title</title>
    <link
      rel="stylesheet"
      href="https://unpkg.com/simpledotcss@2.1.1/simple.min.css"
      integrity="sha384-qC703vPcX6cOHrtWqLaQlbDlZPbQwvoNRx745QTRM8EdQLH9P37LqhmdYUF09dQT"
      crossorigin="anonymous"
    />
    <script
      src="https://unpkg.com/htmx.org@1.8.0"
      integrity="sha384-cZuAZ+ZbwkNRnrKi05G/fjBX+azI9DNOkNYysZ0I/X5ZFgsmMiBXgDZof30F5ofc"
      crossorigin="anonymous"
    ></script>
  </head>
  <body>
    <div id="content">$content</div>
  </body>
</html>
"""


def make_grading_html() -> str:
    data = get_data()

    text = f"""
# Free response answers that still need to be graded

_n={len(data)}, last updated: {datetime.now()}_

"""
    # Flag duplicates
    duplicates: pd.Series = data[data["PROLIFIC_PID"].duplicated()]["PROLIFIC_PID"]
    text += f"## Duplicates\n{duplicates.to_frame().to_html()}\n"

    # Filter out invalid responses
    data = leave_only_valid_responses(data)

    # Filter out those we've dealt with
    data = data[
        ~(
            data["PROLIFIC_PID"].isin(get_approved())
            | data["PROLIFIC_PID"].isin(get_rejected())
        )
    ]

    # Prepare to display results
    text += f"## Responses\n_{len(data)} left to grade_\n"

    for row in data.itertuples():
        id = getattr(row, "PROLIFIC_PID")
        current = f"#### {id}\n"

        for q in QUESTION_IDS:
            response = getattr(row, q)

            if pd.isnull(response):
                continue

            current += f"**{q}**\n"
            current += f"{response} \n\n"

        buttons = f"""<div id='buttons-{id}'>
        <button hx-post="/approve/{id}"
            hx-trigger="click"
            hx-target="#buttons-{id}"
            hx-swap="outerHTML"
            style="background-color: green"
            >
            Approve
        </button>
        <button hx-post="/reject/{id}"
            hx-trigger="click"
            hx-target="#buttons-{id}"
            hx-swap="outerHTML"
            style="background-color: red"
            >
            Reject
        </button>
        </div>"""

        text += f"{current}\n{buttons}\n----\n"

    html = markdown(text)
    document = Template(TEMPLATE).substitute(title="Pending results", content=html)
    return document


@route("/")
def show_pending_completions():
    return make_grading_html()


@post("/approve/<id>")
def approve_response(id):
    with open("approved.txt", "a") as f:
        f.write(id)
        f.write("\n")
    return '<span style="color:green">Approved</span>'


@post("/reject/<id>")
def reject_response(id):
    with open("rejected.txt", "a") as f:
        f.write(id)
        f.write("\n")
    return '<span style="color:red">Rejected</span>'


@route("/static/<filename>")
def server_static(filename):
    return static_file(filename, root="static")


PORT = 8080
run(host="localhost", port=PORT, debug=True, reloader=True)

## requirements.txt
bottle==0.12.23
Markdown==3.4.1
pandas==1.5.0
	from datetime import datetime
	from pathlib import Path
	from string import Template

	import pandas as pd
	from bottle import route, run, post, static_file
	from markdown import markdown

	QUESTION_IDS = [
	"question_a",
	"question_b",
	]


	def get_data():
	data = pd.read_csv("data.csv")
	return data


	def get_approved():
	with open("approved.txt", "r") as f:
	return set([line.strip() for line in f.readlines()])


	def get_rejected():
	with open("rejected.txt", "r") as f:
	return set([line.strip() for line in f.readlines()])


	def get_prolific_data():
	# Find all Prolific export files
	export_files = list(Path("~/Downloads").expanduser().glob("prolific_export_*.csv"))
	if len(export_files) == 0:
	raise RuntimeError("couldn't find Prolific export files on Downloads")

	# Pick the export file that was last modified, assuming it's the latest one and should be used
	sorted_export = sorted(export_files, key=lambda f: f.stat().st_mtime)
	prolific_file = sorted_export[-1]

	prolific_data = pd.read_csv(prolific_file)
	return prolific_data


	def leave_only_valid_responses(data):
	# Load Prolific export
	prolific_data = get_prolific_data()

	# Join data with Prolific export
	joined = pd.merge(
	data,
	prolific_data,
	left_on="PROLIFIC_PID",
	right_on="PROLIFIC_PID",
	how="left",
	)

	# Retain only ungraded or unknown (according to Prolific)
	filtered_data = joined[(joined["status"] == "AWAITING REVIEW")]
	return filtered_data


	TEMPLATE = """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8" />
	<meta name="viewport" content="width=device-width, initial-scale=1.0" />
	<title>$title</title>
	<link
	rel="stylesheet"
	href="https://unpkg.com/simpledotcss@2.1.1/simple.min.css"
	integrity="sha384-qC703vPcX6cOHrtWqLaQlbDlZPbQwvoNRx745QTRM8EdQLH9P37LqhmdYUF09dQT"
	crossorigin="anonymous"
	/>
	<script
	src="https://unpkg.com/htmx.org@1.8.0"
	integrity="sha384-cZuAZ+ZbwkNRnrKi05G/fjBX+azI9DNOkNYysZ0I/X5ZFgsmMiBXgDZof30F5ofc"
	crossorigin="anonymous"
	></script>
	</head>
	<body>
	<div id="content">$content</div>
	</body>
	</html>
	"""


	def make_grading_html() -> str:
	data = get_data()

	text = f"""
	# Free response answers that still need to be graded

	_n={len(data)}, last updated: {datetime.now()}_

	"""
	# Flag duplicates
	duplicates: pd.Series = data[data["PROLIFIC_PID"].duplicated()]["PROLIFIC_PID"]
	text += f"## Duplicates\n{duplicates.to_frame().to_html()}\n"

	# Filter out invalid responses
	data = leave_only_valid_responses(data)

	# Filter out those we've dealt with
	data = data[
	~(
	data["PROLIFIC_PID"].isin(get_approved())
	\| data["PROLIFIC_PID"].isin(get_rejected())
	)
	]

	# Prepare to display results
	text += f"## Responses\n_{len(data)} left to grade_\n"

	for row in data.itertuples():
	id = getattr(row, "PROLIFIC_PID")
	current = f"#### {id}\n"

	for q in QUESTION_IDS:
	response = getattr(row, q)

	if pd.isnull(response):
	continue

	current += f"{q}\n"
	current += f"{response} \n\n"

	buttons = f"""<div id='buttons-{id}'>
	<button hx-post="/approve/{id}"
	hx-trigger="click"
	hx-target="#buttons-{id}"
	hx-swap="outerHTML"
	style="background-color: green"
	>
	Approve
	</button>
	<button hx-post="/reject/{id}"
	hx-trigger="click"
	hx-target="#buttons-{id}"
	hx-swap="outerHTML"
	style="background-color: red"
	>
	Reject
	</button>
	</div>"""

	text += f"{current}\n{buttons}\n----\n"

	html = markdown(text)
	document = Template(TEMPLATE).substitute(title="Pending results", content=html)
	return document


	@route("/")
	def show_pending_completions():
	return make_grading_html()


	@post("/approve/<id>")
	def approve_response(id):
	with open("approved.txt", "a") as f:
	f.write(id)
	f.write("\n")
	return '<span style="color:green">Approved</span>'


	@post("/reject/<id>")
	def reject_response(id):
	with open("rejected.txt", "a") as f:
	f.write(id)
	f.write("\n")
	return '<span style="color:red">Rejected</span>'


	@route("/static/<filename>")
	def server_static(filename):
	return static_file(filename, root="static")


	PORT = 8080
	run(host="localhost", port=PORT, debug=True, reloader=True)