This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
import argh | |
from scrapy.crawler import CrawlerProcess | |
from scrapy.settings import Settings | |
USER_AGENTS = [ | |
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", | |
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", | |
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", | |
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [ $# != 2 ]; then | |
echo "usage: $0 <directory> <hostname>" | |
exit 1 | |
fi | |
if [ ! -d $1 ];then | |
mkdir -p $1 | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from fedora.client import AccountSystem | |
from getpass import getpass | |
import sys | |
if len(sys.argv) != 2: | |
print "Usage: %s <country code>" % (sys.argv[0]) | |
sys.exit(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import json | |
import re | |
from dateutil.parser import parse as parse_date | |
from datetime import datetime | |
f = urllib.urlopen("http://apims.doe.gov.my/v2/").read() | |
stage1 = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime, timedelta | |
import urllib | |
import re | |
from lxml.html import fromstring | |
from cssselect import GenericTranslator, SelectorError | |
import os | |
import json | |
base_url = 'http://apims.doe.gov.my/v2/' | |
HOURS = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# $Id: tranalyzer.py,v 1.5 1999/10/22 14:23:55 tsarna Exp tsarna $ | |
# | |
# Copyright (c) 1999 Tyler C. Sarna | |
# All rights reserved. | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions | |
# are met: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def csvRDD_to_rowRDD(rdd): | |
#expect a RDD that stores csv | |
# eg: rdd = sc.textFile('myfile.csv') | |
from pyspark.sql import Row | |
rdd = rdd.zipWithIndex() | |
fail_key = 'X_IMPORT_FAIL' | |
def extract_row(keys): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import sys | |
from multiprocessing import Process, Pool, cpu_count | |
def transform(*args): | |
# -- do something here -- | |
return [] | |
def process_line(line): | |
line = line.strip().split('\t') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import date | |
from datetime import timedelta | |
import copy | |
# ported from npmjs epi-week package | |
# https://github.com/wombleton/epi-week | |
# | |
#getFirstWeek = (year) -> | |
# end = new Date(year, 0, 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import facebook | |
import argh | |
import requests | |
from ConfigParser import ConfigParser | |
from pprint import pprint | |
import time | |
import json | |
import logging | |
import traceback | |
logging.basicConfig(level=logging.INFO) |
OlderNewer