Skip to content

Instantly share code, notes, and snippets.

View qi-qi's full-sized avatar

Qi Qi qi-qi

View GitHub Profile
# wget https://ip-ranges.amazonaws.com/ip-ranges.json
# jq '.prefixes[] | select(.region=="eu-west-1")' < ip-ranges.json
{
"ip_prefix": "13.248.118.0/24",
"region": "eu-west-1",
"service": "AMAZON"
}
{
"ip_prefix": "54.155.0.0/16",
#!/bin/bash
set -ex
[ -e /home/ec2-user/glue_ready ] && exit 0
mkdir -p /home/ec2-user/glue
cd /home/ec2-user/glue
# Write dev endpoint in a file which will be used by daemon scripts
glue_endpoint_file="/home/ec2-user/glue/glue_endpoint.txt"
@qi-qi
qi-qi / kinesis-firehose-sample.py
Last active February 11, 2020 08:44
sample code to put record to kinesis firehose
# Message in payload should be single-line minimized json + newline '\n' append at the end of each line:
# eg:
# {"id": 111, "name": "QiQi", "email": "test@test.com"}
# {"id": 222, "name": "Hello", "email": "hello@world.com"}
# {"id": 333, "name": "tv", "email": "tv@data.com"}
import boto3
import json
client = boto3.client('firehose', aws_access_key_id='aaa', aws_secret_access_key='bbb', region_name='eu-west-1')
@qi-qi
qi-qi / misc.sh
Last active January 29, 2020 18:04
tar czf result.tar.gz -C `pwd` .
curl https://bashupload.com/result.tar.gz --data-binary @result.tar.gz
%%bash
export NAME=2.7
cp -R /data .
tar czf ${NAME}.tar.gz -C `pwd` .
curl https://bashupload.com/${NAME}.tar.gz --data-binary @${NAME}.tar.gz
FROM python:3.7.6-slim
# Never prompt the user for choices on installation/configuration of packages
ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux
# Airflow
ARG AIRFLOW_USER_HOME=/airflow
ARG AIRFLOW_USER="airflow"
ARG AIRFLOW_VERSION="1.10.7"
apiVersion: v1
kind: Namespace
metadata:
name: airflow
---
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: efs-sc
provisioner: efs.csi.aws.com
git clone https://github.com/awslabs/amazon-kinesis-agent.git
sudo ./setup --install
...
Configuration file installed at: /etc/aws-kinesis/agent.json
Configuration details:
{
"cloudwatch.emitMetrics": true,
"kinesis.endpoint": "",
val ranges = collect_set(struct($"from", $"to")).as("from_to")
df.groupBy($"id")
.agg(ranges)
.withColumn("bytes_sum_unique", Util.findUniqueBytesUDF($"from_to"))
// Use the BitSet from java.util.BitSet() due to performance
val findUniqueBytesUDF: UserDefinedFunction = udf { ranges: Seq[Row] =>
ranges
.map(x => (x.getAs[Int]("legit_from"), x.getAs[Int]("legit_to")))
.aggregate(new java.util.BitSet())((bitset, range) => {
## Colab Notebook: https://colab.research.google.com/drive/1bzT9XYTymi5E-x4C_-tmpla9_Ha2PgQd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
keyword = 'ipad pro' #@param {type:"string"}
query = '+'.join(keyword.split(' '))
dt_fmt = '%Y-%m-%d %H:%M:%S'
import requests
import json
from collections import OrderedDict
d1 = {}
d2 = {}
personal_number = "yyyymmdd-xxxx"
for i in range(1000000, 1000400):