Skip to content

Instantly share code, notes, and snippets.

@tori-takashi
Created March 15, 2019 06:02
Show Gist options
  • Save tori-takashi/a5431dde0df603dc260da8d2b35e0ac1 to your computer and use it in GitHub Desktop.
Save tori-takashi/a5431dde0df603dc260da8d2b35e0ac1 to your computer and use it in GitHub Desktop.
inverted_index_mapper.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import re
import os
def map(input):
filename = os.path.basename(os.environ["map_input_file"])
words = re.split('[\s,.]', input)
return [(word, filename) for word in words if word]
def pipe(kv):
for word, filename in kv:
print('{0}\t{1}'.format(word,filename))
if __name__ == "__main__":
for input in sys.stdin:
kv = map(input)
pipe(kv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment