Created
June 13, 2017 17:27
-
-
Save rayansostenes/90e48d6b38d89a07d83e2a356de3211a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from collections import Counter, defaultdict | |
class TextSearch: | |
def __init__(self, url): | |
self.url = url | |
self.word_list = [str(v).lower() for v in self.get_text().split()] | |
self.word_lenght = len(self.word_list) | |
self.word_count = Counter(self.word_list) | |
self.count_word = defaultdict(list) | |
for k,v in self.word_count.items(): | |
self.count_word[v].append(k) | |
def get_text(self): | |
resp = requests.get(self.url) | |
resp.raise_for_status() | |
return resp.content | |
def most_common(self, number): | |
return self.word_count.most_common(number) | |
def total_number_words(self): | |
return len(self.word_list) | |
def unique_word_count(self): | |
return len(set(self.word_list)) | |
def words_that_appear_n_times(self, number): | |
return self.count_word[number] | |
def main(): | |
url = 'http://pastebin.com/raw/xkTYv7nf' | |
words = TextSearch(url) | |
print(words.total_number_words()) | |
print(words.unique_word_count()) | |
print(words.most_common(5)) | |
print(words.words_that_appear_n_times(1)) | |
if __name__ == '__main__': | |
import sys | |
sys.exit(int(main() or 0)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment