karanparikh/challenge.py

## challenge.py
from sys import argv
from os import listdir
from re import split
from collections import Counter

class Post(object):
  def __init__(self, title, content):
    self.title = title
    self.content = map(lambda w: w.lower(), content)

def process_posts(post_dir):
  posts = []
  for post_file in listdir(post_dir):
    fullname = post_dir + "/" + post_file
    with open(fullname) as pf:
      posts.append(Post(post_file, split("\W+", pf.read())))

  posts.sort(key = lambda p: len(p.content))

  print "The shortest posts are: "
  for i in [0, 1, 2]:
    print "{} with {} words".format(posts[i].title, len(posts[i].content))
  print "The longest posts are: "
  for i in [-1, -2, -3]:
    print "{} with {} words".format(posts[i].title, len(posts[i].content))

  total_post_length = 0.0
  unique_words = set()
  word_counter = Counter()
  for p in posts:
    total_post_length += len(p.content)
    word_counter.update(p.content)
    for word in p.content:
      unique_words.add(word)

  print "The average post length is {} words".format(total_post_length / len(posts))
  print "The number of words written is {}".format(total_post_length)
  print "The number of unique words used is {}".format(len(unique_words))
  print "The 100 most used words are {}".format(word_counter.most_common(100))

if __name__ == "__main__":
  process_posts(argv[1])
	from sys import argv
	from os import listdir
	from re import split
	from collections import Counter

	class Post(object):
	def __init__(self, title, content):
	self.title = title
	self.content = map(lambda w: w.lower(), content)

	def process_posts(post_dir):
	posts = []
	for post_file in listdir(post_dir):
	fullname = post_dir + "/" + post_file
	with open(fullname) as pf:
	posts.append(Post(post_file, split("\W+", pf.read())))

	posts.sort(key = lambda p: len(p.content))

	print "The shortest posts are: "
	for i in [0, 1, 2]:
	print "{} with {} words".format(posts[i].title, len(posts[i].content))
	print "The longest posts are: "
	for i in [-1, -2, -3]:
	print "{} with {} words".format(posts[i].title, len(posts[i].content))

	total_post_length = 0.0
	unique_words = set()
	word_counter = Counter()
	for p in posts:
	total_post_length += len(p.content)
	word_counter.update(p.content)
	for word in p.content:
	unique_words.add(word)

	print "The average post length is {} words".format(total_post_length / len(posts))
	print "The number of words written is {}".format(total_post_length)
	print "The number of unique words used is {}".format(len(unique_words))
	print "The 100 most used words are {}".format(word_counter.most_common(100))

	if __name__ == "__main__":
	process_posts(argv[1])