Skip to content

Instantly share code, notes, and snippets.

View kakarukeys's full-sized avatar

Wong Jiang Fung kakarukeys

View GitHub Profile
@kakarukeys
kakarukeys / robust_parallel_bulk.py
Created July 7, 2022 03:21
robust_parallel_bulk.py
import logging
from enum import IntEnum
from datetime import datetime
from typing import List, Iterable, Dict, Any, Tuple, Literal, NamedTuple, Type
import tenacity as tn
from elasticsearch.helpers import parallel_bulk
from app.elasticsearch.client import es_client
from app.db.models import BaseModel
@kakarukeys
kakarukeys / gist:5cdb111c1ed9cb26c423434abf59ee75
Last active March 5, 2022 15:39
Frontend Engineer Coding Challenge
## Exercise
You are tasked with creating a UI that allows user to filter and view a company dataset, with the following requirements.
1. The app is a React app based on the structure of https://github.com/react-boilerplate/react-boilerplate.
2. The app connects to an HTTP API:
https://faker-companies.dk-dev.leadbook.com/api/v1/industries/
https://faker-companies.dk-dev.leadbook.com/api/v1/companies/
https://faker-companies.dk-dev.leadbook.com/api/v1/companies/?company_location=BA
@kakarukeys
kakarukeys / stream_from_gz.py
Last active October 21, 2021 05:56
stream from gz
import io
import time
from gzip import GzipFile
import pandas as pd
# https://stackoverflow.com/a/20260030/496852
def iterable_to_stream(iterable, buffer_size=io.DEFAULT_BUFFER_SIZE):
"""
@kakarukeys
kakarukeys / gist:51551cd1ad38bb77b0a849d929b7844c
Created September 9, 2021 01:56
summary email harvesting
import re
from pymongo import MongoClient
from settings import DEST_DATABASE_URL
if __name__ == '__main__':
with open("CREDENTIALS") as f:
credentials = f.read().strip()
@kakarukeys
kakarukeys / crawler.py
Last active May 12, 2021 06:59
automate login again (tutorial 10)
import time
from PIL import Image
import requests
from bs4 import BeautifulSoup
HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:88.0) Gecko/20100101 Firefox/88.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
@kakarukeys
kakarukeys / crawler.py
Last active May 12, 2021 06:59
automate login (tutorial 2)
import os
import time
from PIL import Image
import requests
from bs4 import BeautifulSoup
# requires brotlipy, as the server uses brotli compression
username = os.environ["SSM_USERNAME"]
@kakarukeys
kakarukeys / crawler.py
Last active May 12, 2021 06:59
requests crawler (tutorial 1)
import time
import zlib
from io import BytesIO
from zipfile import ZipFile
import requests
from bs4 import BeautifulSoup
HEADERS = {
@kakarukeys
kakarukeys / test_mp.py
Last active April 30, 2021 06:52
python multiprocessing, clean up child processes upon shutdown
import time
import random
from signal import signal, SIGINT, SIGTERM
from multiprocessing import Process, Event
"""
There are two examples:
(1) with graceful shutdown
(2) without graceful shutdown
@kakarukeys
kakarukeys / test_mp.py
Created April 29, 2021 04:27
Testing multi-processing processes
import time
import random
from multiprocessing import Process
def f():
while True:
time.sleep(10)
dice = random.randint(1, 10)
@kakarukeys
kakarukeys / borrow_cookies.py
Created April 22, 2021 03:41
borrow_cookies.py
# -*- coding: utf-8 -*-
import json
import logging
import os.path
import subprocess
import time
from http.cookiejar import LWPCookieJar
from urllib.parse import urlparse
from selenium import webdriver