Skip to content

Instantly share code, notes, and snippets.

@bnroths
Created October 9, 2016 20:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bnroths/0c22f2ea325dbe3b9a6aba6971479561 to your computer and use it in GitHub Desktop.
Save bnroths/0c22f2ea325dbe3b9a6aba6971479561 to your computer and use it in GitHub Desktop.
from mrjob.job import MRJob, MRStep
import os
import sys
class MRMatrixMultiply(MRJob):
def steps(self):
return [MRStep(mapper=self.mapper_1, reducer=self.reducer_1)]
def mapper_1(self, _, line):
vals = [int(x) for x in line.split('\t')]
filename = os.environ['map_input_file']
# file format is N_200_200.txt for exmaple where 200 is number of rows/columns
# sys.stdout.write(os.environ['map_input_file'])
num_rows = int(os.environ['map_input_file'].split('/')[-1].split('_')[1])
for row in range(num_rows):
row = row + 1
if 'M' in filename:
i = vals[0]
j = vals[1]
val = vals[2]
yield (i, row), ('M', j, val)
elif 'N' in filename:
j = vals[0]
k = vals[1]
val = vals[2]
yield (row, k), ('N', j, val)
def reducer_1(self, key, values):
# sys.stderr.write('%s' % key)
# sys.stderr.write('%s' % list(values))
datas = list(values)
M = {}
N = {}
for item in datas:
matrix, a, val = item
if matrix == 'M':
M[a] = val
elif matrix == 'N':
N[a] = val
total = 0
for a in M:
total += M[a] * N[a]
yield key, total
if __name__ == '__main__':
MRMatrixMultiply.run()
# python mm_2.py m/M_10_10.txt m/N_10_10.txt -r dataproc --cluster-id mrjob-us-central1-c-062a0e05efa33342
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment