This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import sys | |
import re | |
import os | |
def map(input): | |
filename = os.path.basename(os.environ["map_input_file"]) | |
words = re.split('[\s,.]', input) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import sys | |
from collections import defaultdict | |
inverted_index = defaultdict(set) | |
def reduce(kv): | |
word, filename = kv.split('\t') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
hadoop jar ./hadoop-streaming-3.2.0.jar \ | |
-input testfile.txt testfile2.txt \ | |
-output i \ | |
-mapper 'inverted_index_mapper.py' \ | |
-reducer 'inverted_index_reducer.py' \ | |
-file inverted_index_mapper.py \ | |
-file inverted_index_reducer.py \ | |
-file testfile.txt \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#updated at 2020/1/18 | |
#!/usr/bin/python3 | |
import websocket | |
import threading | |
import traceback | |
from time import sleep | |
import json | |
import logging | |
import urllib |