Created
December 15, 2019 02:28
-
-
Save dalevross/3ecf86294e69f0816a66b3c2f3639d6e to your computer and use it in GitHub Desktop.
Key Value Pair Service on Google App Engine
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re #Regular expresensions module | |
import cPickle | |
#Get all but the first argument, this script accepts a list of dictionary files | |
arguments = sys.argv[1:] | |
#For each file that you pass to the script | |
for filename in arguments: | |
dictionary = {} | |
#Use file name without extension as dictionary name | |
dictname = re.search(r"\\(?P<dicname>\w+)\.", filename).group('dicname') | |
#Open the current file | |
dictionaryFile = open(filename, "r") | |
#For each line in the dictionary file | |
''' Examples | |
AA rough, cindery lava [n -S] | |
AB an abdominal muscle [n -S] | |
AD an {advertisement=n} [n -S] | |
AE one [adj] | |
AG {agriculture=n} [n -S] | |
''' | |
for line in dictionaryFile: | |
#Match a word (\w+), followed by a possible space (\s?), followed by the definition (.*) | |
#?P<word> allows you to name the group for retrieval later | |
match = re.match(r"(?P<word>\w+)\s?(?P<definition>.*)", line) | |
if match is not None: | |
#If there is a match, add the word and its definition to the dictionary | |
dictionary[match.group('word')] = match.group('definition') | |
else: | |
#Otherwise, print the invalid line | |
print "This line is invalid in file for " + dictname + ": " + line | |
with open("..\\" + dictname + ".pickle", "wb") as mfile: | |
#Create a file of type pickle, and dump the dictionary to it using the cPickle module | |
cPickle.dump(dictionary, mfile) | |
print 'Processed ' + dictname | |
print 'Done' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment