Skip to content

Instantly share code, notes, and snippets.

@danaabs
Created March 25, 2016 14:02
Show Gist options
  • Save danaabs/751e5d0436327f397379 to your computer and use it in GitHub Desktop.
Save danaabs/751e5d0436327f397379 to your computer and use it in GitHub Desktop.
RWETMidterm_Frequency Analysis
from __future__ import division
from random import shuffle
import sys
import string # to find punctuation
import re
import pprint
import random
#load source text
##text_file = open("american_psycho.txt", "w")
punctuation_set = set(string.punctuation)
counts = {}
all_numbers = list()
unique_words = list()
##for c in string.punctuation:
for line in sys.stdin:
line = line.strip()
for punctuation in punctuation_set:
line = line.replace(punctuation, '')
line = line.replace("'", "")
line = line.replace('\"','')
line = line.replace("\n", "")
line = line.replace("\r", "")
line = line.replace("\t", "")
line = line.replace('"','')
words = line.split() # split line by words
for item in words: # for every word
if item in counts: #is the item in the dictionary?
counts[item] += 1
else:
counts[item] = 1
for key in counts.keys():
if counts[key] == 1:
unique_words.append(key.upper())
uniqueWord = random.sample(unique_words, 1)
# for key in counts.keys():
# if counts[key] == max()
######print singular numbers as a giant list
singleList = list()
all_numbers = counts.values()
all_numbers.sort()
most_word = list()
######put the most common word in an array called most_word
for key in counts.keys():
if counts[key] == max(all_numbers):
most_word.append(key)
##print most_word
totalWords = sum(all_numbers)
nums = all_numbers
#######append all the 1's/unique words to singleList
for item in nums:
if item == 1:
singleList.append(item)
oneWords = len(singleList)
######or uniqueWords = sum(singleList)
##print oneWords
##print totalWords
ratioUnique = float(oneWords/totalWords)
print ratioUnique
#######create a list with 6000 background values of 1
backgroundList = list()
backgroundNum = 1000 * ratioUnique
backgroundNum2 = 1000 * (1 - ratioUnique)
backgroundList = most_word * int(backgroundNum2/2) + ['+'] * int(backgroundNum) + most_word * int(backgroundNum2/2)
backgroundList.insert(int(backgroundNum/2 + backgroundNum2/2), uniqueWord)
####shuffle(backgroundList)
int(len(backgroundList)/4)
print s.join(map(str, backgroundList)).center(50)
#______________________________________________________
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment