Skip to content

Instantly share code, notes, and snippets.

@rayansostenes
Created June 13, 2017 17:27
Show Gist options
  • Save rayansostenes/90e48d6b38d89a07d83e2a356de3211a to your computer and use it in GitHub Desktop.
Save rayansostenes/90e48d6b38d89a07d83e2a356de3211a to your computer and use it in GitHub Desktop.
import requests
from collections import Counter, defaultdict
class TextSearch:
def __init__(self, url):
self.url = url
self.word_list = [str(v).lower() for v in self.get_text().split()]
self.word_lenght = len(self.word_list)
self.word_count = Counter(self.word_list)
self.count_word = defaultdict(list)
for k,v in self.word_count.items():
self.count_word[v].append(k)
def get_text(self):
resp = requests.get(self.url)
resp.raise_for_status()
return resp.content
def most_common(self, number):
return self.word_count.most_common(number)
def total_number_words(self):
return len(self.word_list)
def unique_word_count(self):
return len(set(self.word_list))
def words_that_appear_n_times(self, number):
return self.count_word[number]
def main():
url = 'http://pastebin.com/raw/xkTYv7nf'
words = TextSearch(url)
print(words.total_number_words())
print(words.unique_word_count())
print(words.most_common(5))
print(words.words_that_appear_n_times(1))
if __name__ == '__main__':
import sys
sys.exit(int(main() or 0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment