This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Inspired by "Quantifying origin and character of long-range correlations in narrative texts" | |
by Stanisław Drożdż, Paweł Oświȩcimkaa, Andrzej Kuliga, Jarosław Kwapieńa, Katarzyna Bazarnikb, | |
Iwona Grabska-Gradzińskac, Jan Rybickib, and Marek Stanuszekd, this is an attempt to implement | |
the CLTK tokenizers to sentence lengths of works from the Greek and Latin corpora from the | |
Perseus Digital Archive for analysis via the methods used by the above researchers. | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pdb | |
import os, json, re | |
from bs4 import BeautifulSoup | |
import html.parser | |
import pymongo | |
from db import mongo | |
class PerseusToMongo: | |
# a class to migrate Perseus XML file data to mongo db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pdb | |
import re | |
import string | |
import sys | |
class ScansionToHTML: | |
def __init__(self, line, scansion): | |
self.scansion = scansion |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Ignore configuration files that may contain sensitive information. | |
sites/*/settings*.php | |
# Ignore paths that contain user-generated content. | |
sites/*/files | |
sites/*/private | |
sites/*/~private | |
.svn | |
*.svn/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
conj_list = ['ac', 'at', 'atque', 'aut', 'et', 'ne', 'nec', 'non', 'sed', 'si', 'uel', | |
'cum', 'quum', 'donec', 'dum', 'enim', 'enimuero', 'etiam', 'etsi', 'igitur', | |
'itaque', 'nam', 'necnon', 'neque', 'nisi', 'postquam', 'quamquam', 'quamuis', | |
'quando', 'que', 'quia', 'quin', 'quippe', 'quinetiam', 'quod', 'quodque', | |
'siue', 'ut', 'tam', 'necdum'] | |
prep_list = ['ante', 'ad', 'circum', 'contra', 'inter', 'intra', 'post', 'in', 'en', 'praeter', | |
'per', 'propter', 'super', 'uersus', 'extra', 'trans', 'sub', 'ob', 'a', 'ab', | |
'de', 'cum', 'e', 'ex', 'sine', 'pro', 'prae', 'sub', 'super'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymongo | |
import json | |
import pdb | |
def mongo(db): | |
host = "localhost" | |
port = 27017 | |
client = pymongo.MongoClient(host, port, max_pool_size=None) | |
return client[db] |
NewerOlder