Skip to content

Instantly share code, notes, and snippets.

@misterhay
Last active January 23, 2021 21:01
Show Gist options
  • Save misterhay/9ffc00b119e89523974439657865fe89 to your computer and use it in GitHub Desktop.
Save misterhay/9ffc00b119e89523974439657865fe89 to your computer and use it in GitHub Desktop.
Parsing Brighspace assignment downloads and marking spelling and other conventions
import os
import pdfplumber
from spellchecker import SpellChecker
spell = SpellChecker()
for path, directories, files in os.walk('.'):
for filename in files:
if filename.endswith('.pdf'):
filepath = path + os.sep + filename
student = path.split('- ')[1].strip() # get the student name from the directory name
text = ''
with pdfplumber.open(filepath) as pdf:
for page in pdf.pages:
text = text + page.extract_text(x_tolerance=3, y_tolerance=3)
print(student, '-', filename)
print(text)
if filename.endswith('.docx'):
pass
if filename.endswith('.txt'):
with open(filepath, 'r') as textfile:
text = textfile.read()
# count words
# spell check
misspelled = spell.unknown(text)
print(misspelled)
# sentence lengths
# sentence starters
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment