Skip to content

Instantly share code, notes, and snippets.

View tori-takashi's full-sized avatar

toritakashi tori-takashi

  • Tokyo
View GitHub Profile
#updated at 2020/1/18
#!/usr/bin/python3
import websocket
import threading
import traceback
from time import sleep
import json
import logging
import urllib
@tori-takashi
tori-takashi / invindex.sh
Created March 15, 2019 06:07
invindex.sh
#!/bin/bash
hadoop jar ./hadoop-streaming-3.2.0.jar \
-input testfile.txt testfile2.txt \
-output i \
-mapper 'inverted_index_mapper.py' \
-reducer 'inverted_index_reducer.py' \
-file inverted_index_mapper.py \
-file inverted_index_reducer.py \
-file testfile.txt \
@tori-takashi
tori-takashi / inverted_index_reducer.py
Created March 15, 2019 06:04
inverted_index_reducer.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from collections import defaultdict
inverted_index = defaultdict(set)
def reduce(kv):
word, filename = kv.split('\t')
@tori-takashi
tori-takashi / inverted_index_mapper.py
Created March 15, 2019 06:02
inverted_index_mapper.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import re
import os
def map(input):
filename = os.path.basename(os.environ["map_input_file"])
words = re.split('[\s,.]', input)