lvsl-deactivated/frequent_terms.py

## frequent_terms.py
# coding: utf-8

'''
Frequency Counting of Words / Top N words in a document.

Given N terms, your task is to find the k most frequent terms from given N terms.

Input format :

First line of input contains N, denoting the number of terms to add.
In each of the next N lines, each contains a term.
Next line contains k, most frequent terms.

Output format :

Print the k most frequent terms in descending order of their frequency. If two terms have same frequency print them in lexicographical order.

Sample input :

14
Fee
Fi
Fo
Fum
Fee
Fo
Fee
Fee
Fo
Fi
Fi
Fo
Fum
Fee
3

Sample output :

Fee
Fo
Fi

Constraint :
0 < N < 300000
0 < term length < 20.
'''


def get_most_frequent(terms, k):
    d = {}
    for t in terms:
        if t in d:
            d[t] += 1
        else:
            d[t] = 1

    counted = d.items()
    counted.sort(key=lambda x: x[0])
    counted.sort(key=lambda x: -x[1])
    return [t for t,c in counted[:k]]


def main():
    n = int(raw_input())
    terms = []
    for i in xrange(n):
        terms.append(raw_input().strip())
    k = int(raw_input())

    for term in get_most_frequent(terms, k):
        print term


if __name__ == "__main__":
    main()
	# coding: utf-8

	'''
	Frequency Counting of Words / Top N words in a document.

	Given N terms, your task is to find the k most frequent terms from given N terms.

	Input format :

	First line of input contains N, denoting the number of terms to add.
	In each of the next N lines, each contains a term.
	Next line contains k, most frequent terms.

	Output format :

	Print the k most frequent terms in descending order of their frequency. If two terms have same frequency print them in lexicographical order.

	Sample input :

	14
	Fee
	Fi
	Fo
	Fum
	Fee
	Fo
	Fee
	Fee
	Fo
	Fi
	Fi
	Fo
	Fum
	Fee
	3

	Sample output :

	Fee
	Fo
	Fi

	Constraint :
	0 < N < 300000
	0 < term length < 20.
	'''


	def get_most_frequent(terms, k):
	d = {}
	for t in terms:
	if t in d:
	d[t] += 1
	else:
	d[t] = 1

	counted = d.items()
	counted.sort(key=lambda x: x[0])
	counted.sort(key=lambda x: -x[1])
	return [t for t,c in counted[:k]]


	def main():
	n = int(raw_input())
	terms = []
	for i in xrange(n):
	terms.append(raw_input().strip())
	k = int(raw_input())

	for term in get_most_frequent(terms, k):
	print term


	if __name__ == "__main__":
	main()