Skip to content

Instantly share code, notes, and snippets.

textfile = sc.textFile("C:\Users\Padraic.Wade\Desktop\dataA.csv")\
header = textfile.first()
somethingRDD = textfile.filter(lambda line: line != header)\
.map(lambda line: (line.split('\t')[0], max([float(x) for x in line.split('\t')[1:]])))
print somethingRDD.collect()