Skip to content

Instantly share code, notes, and snippets.

View email2liyang's full-sized avatar
🏠
Working from home

Ivan Li email2liyang

🏠
Working from home
View GitHub Profile
#!/bin/sh
yum update -y
yum install -y wget git
wget "http://www.haproxy.org/download/1.5/src/haproxy-1.5.3.tar.gz"
yum groupinstall -y 'Development Tools'
yum install -y openssl-devel
yum install -y rpmdevtools pcre-devel
rpmdev-setuptree
mv haproxy-1.5.3.tar.gz ~/rpmbuild/SOURCES/
@email2liyang
email2liyang / PopularityBreakdown.py
Last active November 6, 2017 14:34
MR job to calculate movie's popularity
from mrjob.job import MRJob
from mrjob.step import MRStep
class PopularityBreakdown(MRJob):
def steps(self):
return [
MRStep(mapper=self.mapper_ratings,
reducer=self.reducer_ratings),
MRStep(reducer=self.reducer_sortings)
]
@email2liyang
email2liyang / mostPopularFiveStartMovies.pig
Created November 19, 2017 09:56
pig script to show the most popular five star movies
ratings = LOAD '/user/maria_dev/ml-100k/u.data' AS (userId:int,movieId:int,rating:int,ratingTime:int);
metadata = LOAD '/user/maria_dev/ml-100k/u.item' USING PigStorage('|')
AS (movieId:int,movieTitle:chararray,releaseDate:chararray,videoRelease:chararray,imdbLink:chararray);
nameLookup = FOREACH metadata GENERATE movieId,movieTitle,ToUnixTime(ToDate(releaseDate,'dd-MMM-yyyy')) AS releaseTime;
ratingByMovie = Group ratings By movieId;
avgRatings = FOREACH ratingByMovie GENERATE group as movieId,AVG(ratings.rating) AS avgRating;
@email2liyang
email2liyang / starbase
Created December 30, 2017 13:17
an hbase rest client snip to insert and load data from hbase
from starbase import Connection
c = Connection("127.0.0.1","8000")
ratings = c.table("ratings")
if( ratings.exists()):
print("Dropping existing table \n")
ratings.drop()
ratings.create("rating")
print("parsing files")
ratingFile = open("/Users/ivan/Desktop/u.data","r")
@email2liyang
email2liyang / docker_test.py
Created March 19, 2019 08:21
docker based unit test with python
import unittest
from datetime import datetime
import docker
class DocerBasedTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
container_name = "test-mysql-" + datetime.now().strftime('%y%m%d%H%M%s')
client = docker.from_env()