Created
March 25, 2016 14:02
-
-
Save danaabs/751e5d0436327f397379 to your computer and use it in GitHub Desktop.
RWETMidterm_Frequency Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
from random import shuffle | |
import sys | |
import string # to find punctuation | |
import re | |
import pprint | |
import random | |
#load source text | |
##text_file = open("american_psycho.txt", "w") | |
punctuation_set = set(string.punctuation) | |
counts = {} | |
all_numbers = list() | |
unique_words = list() | |
##for c in string.punctuation: | |
for line in sys.stdin: | |
line = line.strip() | |
for punctuation in punctuation_set: | |
line = line.replace(punctuation, '') | |
line = line.replace("'", "") | |
line = line.replace('\"','') | |
line = line.replace("\n", "") | |
line = line.replace("\r", "") | |
line = line.replace("\t", "") | |
line = line.replace('"','') | |
words = line.split() # split line by words | |
for item in words: # for every word | |
if item in counts: #is the item in the dictionary? | |
counts[item] += 1 | |
else: | |
counts[item] = 1 | |
for key in counts.keys(): | |
if counts[key] == 1: | |
unique_words.append(key.upper()) | |
uniqueWord = random.sample(unique_words, 1) | |
# for key in counts.keys(): | |
# if counts[key] == max() | |
######print singular numbers as a giant list | |
singleList = list() | |
all_numbers = counts.values() | |
all_numbers.sort() | |
most_word = list() | |
######put the most common word in an array called most_word | |
for key in counts.keys(): | |
if counts[key] == max(all_numbers): | |
most_word.append(key) | |
##print most_word | |
totalWords = sum(all_numbers) | |
nums = all_numbers | |
#######append all the 1's/unique words to singleList | |
for item in nums: | |
if item == 1: | |
singleList.append(item) | |
oneWords = len(singleList) | |
######or uniqueWords = sum(singleList) | |
##print oneWords | |
##print totalWords | |
ratioUnique = float(oneWords/totalWords) | |
print ratioUnique | |
#######create a list with 6000 background values of 1 | |
backgroundList = list() | |
backgroundNum = 1000 * ratioUnique | |
backgroundNum2 = 1000 * (1 - ratioUnique) | |
backgroundList = most_word * int(backgroundNum2/2) + ['+'] * int(backgroundNum) + most_word * int(backgroundNum2/2) | |
backgroundList.insert(int(backgroundNum/2 + backgroundNum2/2), uniqueWord) | |
####shuffle(backgroundList) | |
int(len(backgroundList)/4) | |
print s.join(map(str, backgroundList)).center(50) | |
#______________________________________________________ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment