arnesund/hashtag_word_count.py

## hashtag_word_count.py
# Count the number of occurrences for each hashtag,
# by first extracting the hashtag and lowercasing it,
# then do a standard word count with map and reduceByKey
countsRDD = (filteredTweetsRDD
             .flatMap(lambda tweet: [hashtag['text'].lower() for hashtag in tweet['entities']['hashtags']])
             .map(lambda tag: (tag, 1))
             .reduceByKey(lambda a, b: a + b)
            )

# Get the most used hashtags (order countsRDD descending by count)
countsRDD.takeOrdered(20, lambda (key, value): -value)
	# Count the number of occurrences for each hashtag,
	# by first extracting the hashtag and lowercasing it,
	# then do a standard word count with map and reduceByKey
	countsRDD = (filteredTweetsRDD
	.flatMap(lambda tweet: [hashtag['text'].lower() for hashtag in tweet['entities']['hashtags']])
	.map(lambda tag: (tag, 1))
	.reduceByKey(lambda a, b: a + b)
	)

	# Get the most used hashtags (order countsRDD descending by count)
	countsRDD.takeOrdered(20, lambda (key, value): -value)