Created
November 23, 2015 10:53
-
-
Save tuttlem/a9cbd1cbf39ca8636955 to your computer and use it in GitHub Desktop.
Prime evaluation with Hadoop Streaming
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import itertools | |
from math import sqrt | |
def read_input(file): | |
for line in file: | |
# ensure that the newline is taken out of the equation | |
snums = line.replace('\n', '').split(',') | |
yield map(lambda s: int(s), snums) | |
def prime_candidate_seq(): | |
# these numbers are the only valid endings for primes above 9 | |
suffixes = [1, 3, 7, 9] | |
index = 0 | |
decade = 10 | |
# send out the seed primes to test | |
yield 2 | |
yield 3 | |
yield 5 | |
yield 7 | |
# the rest are just going to be generated guesses to test | |
while True: | |
yield decade + suffixes[index] | |
index = index + 1 | |
if index == 4: | |
index = 0 | |
decade += 10 | |
def test_prime(n): | |
# denominators can't exceed the square-root of | |
# the source number that we're testing | |
v = sqrt(n) | |
# count up to the target number | |
return not any(n % v == 0 for v in itertools.takewhile(lambda x: x < v, prime_candidate_seq())) | |
def main(): | |
rows = read_input(sys.stdin) | |
for row in rows: | |
for p in filter(lambda n: test_prime(n), row): | |
print p | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
def main(): | |
# input comes from STDIN (standard input) | |
for line in sys.stdin: | |
print line.replace('\n', '') | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# candidates.txt needs to be uploaded to HDFS first. It'll hold all of the potential primes for processing. | |
# testing the mapper and reducer locally can be done with bash | |
# cat candidates.txt | ./mapper.py | sort -k1,1 | ./reducer.py | |
$HADOOP_PREFIX/bin/hadoop jar \ | |
$HADOOP_PREFIX/share/hadoop/tools/lib/hadoop-streaming-2.7.0.jar \ | |
-mapper $(pwd)/mapper.py \ | |
-reducer $(pwd)/reducer.py \ | |
-input /user/root/candidates.txt \ | |
-output /user/root/candidates-out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment