JSen hujinsen

## recsys-pyspark.py
#Get the data here http://grouplens.org/datasets/movielens/
movielens = sc.textFile("../in/ml-100k/u.data")

movielens.first() #u'196\t242\t3\t881250949'
movielens.count() #100000

#Clean up the data by splitting it
#Movielens readme says the data is split by tabs and
#is user product rating timestamp
clean_data = movielens.map(lambda x:x.split('\t'))
	#Get the data here http://grouplens.org/datasets/movielens/
	movielens = sc.textFile("../in/ml-100k/u.data")

	movielens.first() #u'196\t242\t3\t881250949'
	movielens.count() #100000

	#Clean up the data by splitting it
	#Movielens readme says the data is split by tabs and
	#is user product rating timestamp
	clean_data = movielens.map(lambda x:x.split('\t'))