Skip to content

Instantly share code, notes, and snippets.

View menzenski's full-sized avatar

Matt Menzenski menzenski

View GitHub Profile
@menzenski
menzenski / DimFinderClass.py
Last active December 25, 2015 20:39
Define a Python class and methods for locating diminutive nouns in a Russian text.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import nltk
from nltk.stem import SnowballStemmer
from nltk import FreqDist
import codecs
def print_list(mylist):
'''Print a list containing unicode characters.'''
@menzenski
menzenski / yandexsearcher.py
Created October 18, 2013 05:15
Automate a search of slovari.yandex.ru
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import nltk
from nltk.stem import SnowballStemmer
from nltk import FreqDist
import codecs
import glob
from diminutivefinder_04_asclass import DiminutiveFinder
import matplotlib.pyplot as plt
@menzenski
menzenski / digitalhumanitiesproject.py
Last active December 25, 2015 20:39
Find specific diminutive nouns in a Russian text.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import nltk
from nltk.stem import SnowballStemmer
from nltk import FreqDist
import codecs
import glob
from diminutivefinder_04_asclass import DiminutiveFinder
\newcommand{\trans}[1]{{\em #1}} % RUSSIAN TRANSLITERATION
\newcommand{\engl}[1]{`#1'} % ENGLISH TRANSLATION
\section{Discussion}
This small-scale analysis shows that there is not a significant difference in
the durations of the relative pronoun \trans{\v{s}to} and the interrogative
pronoun \trans{\v{s}to}, nor is there a significant difference in the durations
of the \trans{\v{s}to} of complex conjunctions such as \trans{potomu \v{s}to}
\engl{because} and the \trans{\v{s}to} of the hypothetical subordinator
\trans{\v{s}toby}. However, there is a statistically significant difference in
the durations of these two sorts of \trans{\v{s}to}, i.e., of \trans{\v{s}to}
as an independent pronoun and \trans{\v{s}to} as part of a complex conjunction
139942 tokens
20123 distinct stems
2589 diminutives
684 distinct possible diminutive stems
лиц 202
буфетчик 72
наконец 66
ник 66
лестниц 44
аннушк 41
улиц 34
милиц 33
пок 33
маленьк 31
def save_results(dictionary):
"""Save the list of possible diminutive stems in a text file."""
# generate a file name for the saved list of diminutive stems
list_of_dim_stems = 'diminutive_stems.txt'
# write the list of diminutive stems to that file
with codecs.open(list_of_dim_stems, "w", encoding="utf-8") as stream:
for item in dictionary.keys():
word = item
freq = dictionary[item]
stream.write("%s %r\n" % (word, freq))
dim_fd = FreqDist(diminutives)
from nltk import FreqDist