Skip to content

Instantly share code, notes, and snippets.

View menzenski's full-sized avatar

Matt Menzenski menzenski

View GitHub Profile
# possible endings of diminutive stems
diminutive_endings = (
# first degree of expressiveness
#u"к",
u"ик",
u"чик",
u"ок", #u"ек",
u"ец", u"иц",
u"енок", u"онок", u"еныш",
u"инк", u"инок",
diminutives = []
for word in stemlist:
if word.endswith(diminutive_endings):
diminutives.append(word)
from nltk import FreqDist
dim_fd = FreqDist(diminutives)
def save_results(dictionary):
"""Save the list of possible diminutive stems in a text file."""
# generate a file name for the saved list of diminutive stems
list_of_dim_stems = 'diminutive_stems.txt'
# write the list of diminutive stems to that file
with codecs.open(list_of_dim_stems, "w", encoding="utf-8") as stream:
for item in dictionary.keys():
word = item
freq = dictionary[item]
stream.write("%s %r\n" % (word, freq))
лиц 202
буфетчик 72
наконец 66
ник 66
лестниц 44
аннушк 41
улиц 34
милиц 33
пок 33
маленьк 31
139942 tokens
20123 distinct stems
2589 diminutives
684 distinct possible diminutive stems
\section{Discussion}
This small-scale analysis shows that there is not a significant difference in
the durations of the relative pronoun \trans{\v{s}to} and the interrogative
pronoun \trans{\v{s}to}, nor is there a significant difference in the durations
of the \trans{\v{s}to} of complex conjunctions such as \trans{potomu \v{s}to}
\engl{because} and the \trans{\v{s}to} of the hypothetical subordinator
\trans{\v{s}toby}. However, there is a statistically significant difference in
the durations of these two sorts of \trans{\v{s}to}, i.e., of \trans{\v{s}to}
as an independent pronoun and \trans{\v{s}to} as part of a complex conjunction
\newcommand{\trans}[1]{{\em #1}} % RUSSIAN TRANSLITERATION
\newcommand{\engl}[1]{`#1'} % ENGLISH TRANSLATION
@menzenski
menzenski / digitalhumanitiesproject.py
Last active December 25, 2015 20:39
Find specific diminutive nouns in a Russian text.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import nltk
from nltk.stem import SnowballStemmer
from nltk import FreqDist
import codecs
import glob
from diminutivefinder_04_asclass import DiminutiveFinder