Matt Menzenski menzenski

## DimFinderClass.py
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import nltk
from nltk.stem import SnowballStemmer
from nltk import FreqDist
import codecs

def print_list(mylist):
    '''Print a list containing unicode characters.'''

## yandexsearcher.py
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import nltk
from nltk.stem import SnowballStemmer
from nltk import FreqDist
import codecs
import glob
from diminutivefinder_04_asclass import DiminutiveFinder
import matplotlib.pyplot as plt

## digitalhumanitiesproject.py
#! /usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import nltk
from nltk.stem import SnowballStemmer
from nltk import FreqDist
import codecs
import glob
from diminutivefinder_04_asclass import DiminutiveFinder

## latex_sample_02.tex
\newcommand{\trans}[1]{{\em #1}}    % RUSSIAN TRANSLITERATION
\newcommand{\engl}[1]{`#1'}         % ENGLISH TRANSLATION

## latex_sample_01.tex
\section{Discussion}

This small-scale analysis shows that there is not a significant difference in
the durations of the relative pronoun \trans{\v{s}to} and the interrogative
pronoun \trans{\v{s}to}, nor is there a significant difference in the durations
of the \trans{\v{s}to} of complex conjunctions such as  \trans{potomu \v{s}to}
\engl{because} and the \trans{\v{s}to} of the hypothetical subordinator
\trans{\v{s}toby}. However, there is a statistically significant difference in
the durations of these two sorts of \trans{\v{s}to}, i.e., of \trans{\v{s}to}
as an independent pronoun and \trans{\v{s}to} as part of a complex conjunction

## dimfinder_results.txt
139942 tokens
20123 distinct stems
2589 diminutives
684 distinct possible diminutive stems

## high_frequency_diminutives.txt
лиц 202
буфетчик 72
наконец 66
ник 66
лестниц 44
аннушк 41
улиц 34
милиц 33
пок 33
маленьк 31

## python_housekeeping.py
def save_results(dictionary):
    """Save the list of possible diminutive stems in a text file."""
    # generate a file name for the saved list of diminutive stems
    list_of_dim_stems = 'diminutive_stems.txt'
    # write the list of diminutive stems to that file
    with codecs.open(list_of_dim_stems, "w", encoding="utf-8") as stream:
        for item in dictionary.keys():
		    word = item
		    freq = dictionary[item]
		    stream.write("%s %r\n" % (word, freq))

## diminutive_frequency.py
dim_fd = FreqDist(diminutives)

## import_freqdist.py
from nltk import FreqDist
	#! /usr/bin/env python
	# -- coding: utf-8 --

	import nltk
	from nltk.stem import SnowballStemmer
	from nltk import FreqDist
	import codecs

	def print_list(mylist):
	'''Print a list containing unicode characters.'''
	#! /usr/bin/env python
	# -- coding: utf-8 --

	from __future__ import division
	import nltk
	from nltk.stem import SnowballStemmer
	from nltk import FreqDist
	import codecs
	import glob
	from diminutivefinder_04_asclass import DiminutiveFinder
	\newcommand{\trans}[1]{{\em #1}} % RUSSIAN TRANSLITERATION
	\newcommand{\engl}[1]{`#1'} % ENGLISH TRANSLATION
	\section{Discussion}

	This small-scale analysis shows that there is not a significant difference in
	the durations of the relative pronoun \trans{\v{s}to} and the interrogative
	pronoun \trans{\v{s}to}, nor is there a significant difference in the durations
	of the \trans{\v{s}to} of complex conjunctions such as \trans{potomu \v{s}to}
	\engl{because} and the \trans{\v{s}to} of the hypothetical subordinator
	\trans{\v{s}toby}. However, there is a statistically significant difference in
	the durations of these two sorts of \trans{\v{s}to}, i.e., of \trans{\v{s}to}
	as an independent pronoun and \trans{\v{s}to} as part of a complex conjunction
	139942 tokens
	20123 distinct stems
	2589 diminutives
	684 distinct possible diminutive stems
	лиц 202
	буфетчик 72
	наконец 66
	ник 66
	лестниц 44
	аннушк 41
	улиц 34
	милиц 33
	пок 33
	маленьк 31
	def save_results(dictionary):
	"""Save the list of possible diminutive stems in a text file."""
	# generate a file name for the saved list of diminutive stems
	list_of_dim_stems = 'diminutive_stems.txt'
	# write the list of diminutive stems to that file
	with codecs.open(list_of_dim_stems, "w", encoding="utf-8") as stream:
	for item in dictionary.keys():
	word = item
	freq = dictionary[item]
	stream.write("%s %r\n" % (word, freq))