Created
March 30, 2013 12:46
-
-
Save pbartleby/5276544 to your computer and use it in GitHub Desktop.
Workflow example for "Distant Reading Alliteration in Latin Poetry". Presented at Word, Space, Time: Digital Perspectives on the Classical World (Digital Classics Association conference) on 4.6.13 @ U. Buffalo.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# !/usr/local/bin/python | |
# -*- coding: utf-8 -*- | |
# alliteration_python_sample.py | |
""" | |
Workflow example for | |
Distant Reading Alliteration in Latin Poetry | |
Patrick J. Burns | |
Fordham University, Department of Classics | |
Word, Space, Time: Digital Perspectives on the Classical World | |
Digital Classics Association conference | |
4.6.13 @ U. Buffalo | |
""" | |
# READ | |
# Source text stored in a string. | |
source = """Arma virumque cano, Troiae qui primus ab oris | |
Italiam, fato profugus, Laviniaque venit | |
litora, multum ille et terris iactatus et alto""" | |
# PREPROCESS / DEFORM | |
import re # Use regular expressions for some deformations | |
import string # Use string methods translate and encode/decode for some deformations | |
deformed = source # Make copy of source to work on | |
deformed = deformed.lower() # Make lowercase | |
deformed = deformed.translate(None, string.punctuation) # Remove punctuation | |
deformed = deformed.translate(None, '0123456789') # Remove numbers | |
deformed = re.sub(r'(\B\w)',r'', deformed) # Remove non-initial characters | |
deformed = deformed.translate(None, ' ') # Remove spaces | |
##### Result for Virgil Aeneid 1.1: | |
##### 'Arma virumque cano, Troiae qui primus ab oris' -> avctqpao | |
# SPLIT / STORE | |
lines = deformed.split('\n') # Split string by newline characters (\n) and store in 'lines' | |
##### Result for Virgil Aeneid 1.1-3, stored in 'lines': | |
##### ['avctqpao','ifplv','lmietiea'] | |
# ITERATE / PROCESS | |
# As a sample process, here I will go through each line (i.e. item in the list 'lines') and alphabetize its contents. | |
alpha_lines = [] # Make empty list to store processed items | |
for line in lines: # ITERATE | |
alpha_lines.append("".join(sorted(line))) # PROCESS | |
##### Result for Virgil Aeneid 1.1-3, stored in 'lines': | |
##### ['avctqpao','ifplv','lmietiea'] -> ['aacopqtv', 'filpv', 'aeeiilmt'] | |
# Output processed text | |
for line in alpha_lines: | |
print line |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment