Skip to content

Instantly share code, notes, and snippets.

@jrx
Created July 13, 2017 13:32
Show Gist options
  • Save jrx/aa4c82070568ec7802dd021feb0bdf74 to your computer and use it in GitHub Desktop.
Save jrx/aa4c82070568ec7802dd021feb0bdf74 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from operator import add
from pyspark import SparkContext
if __name__ == "__main__":
sc = SparkContext(appName="WordCount")
lines = sc.textFile("hdfs:///spark-test/constitution.txt")
counts = lines.flatMap(lambda x: x.split(' ')) \
.map(lambda x: (x, 1)) \
.reduceByKey(add)
output = counts.collect()
for (word, count) in output:
print str(word) +": "+ str(count);
sc.stop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment