Skip to content

Instantly share code, notes, and snippets.

@linben
Created August 19, 2017 00:21
Show Gist options
  • Save linben/d23bedc8fd07a6443d7fe56d594257af to your computer and use it in GitHub Desktop.
Save linben/d23bedc8fd07a6443d7fe56d594257af to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from operator import add
from pyspark import SparkContext
if __name__ == "__main__":
sc = SparkContext(appName="WordCount")
lines = sc.textFile("hdfs:///spark-test/constitution.txt")
counts = lines.flatMap(lambda x: x.split(' ')) \
.map(lambda x: (x, 1)) \
.reduceByKey(add)
output = counts.collect()
for (word, count) in output:
print str(word) +": "+ str(count);
sc.stop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment