Skip to content

Instantly share code, notes, and snippets.

@xavivars
Created October 14, 2014 21:39
Show Gist options
  • Save xavivars/38ecea31809d72081a81 to your computer and use it in GitHub Desktop.
Save xavivars/38ecea31809d72081a81 to your computer and use it in GitHub Desktop.
APY unknown performance
#!/usr/bin/env python3
# vim: set ts=4 sw=4 sts=4 et :
import sqlite3, re
from datetime import datetime
missingFreqsDBConn = None
unknownMarkRE = re.compile(r'\*([^.,;:\t\* ]+)')
def stripUnknownMarks(text):
print "[ str: ", datetime.now()
a = re.sub(unknownMarkRE, r'\1', text)
print "] str: ", datetime.now()
return a
def noteUnknownTokens(text, pair):
print "[ re: ", datetime.now()
print pair, text
for token in re.findall(unknownMarkRE, text):
print "->re: ", datetime.now()
noteUnknownToken(token, pair, 'this.db')
print "] re: ", datetime.now()
def noteUnknownToken(token, pair, dbPath):
print "[ sql: ", datetime.now()
global missingFreqsDBConn
if not missingFreqsDBConn:
missingFreqsDBConn = sqlite3.connect(dbPath)
c = missingFreqsDBConn.cursor()
c.execute('CREATE TABLE IF NOT EXISTS missingFreqs (pair TEXT, token TEXT, frequency INTEGER, UNIQUE(pair, token))')
c.execute('INSERT OR REPLACE INTO missingFreqs VALUES (:pair, :token, COALESCE((SELECT frequency FROM missingFreqs WHERE pair=:pair AND token=:token), 0) + 1)', {'pair': pair, 'token': token})
missingFreqsDBConn.commit()
print "] sql: ", datetime.now()
longText = '*sampleWord *sampleWord *sampleWord *sampleWsord'
if __name__ == '__main__':
noteUnknownTokens(longText, 'spa-cat')
print stripUnknownMarks(longText)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment