Skip to content

Instantly share code, notes, and snippets.

View qi-qi's full-sized avatar

Qi Qi qi-qi

View GitHub Profile
#!/bin/bash
set -ex
[ -e /home/ec2-user/glue_ready ] && exit 0
mkdir -p /home/ec2-user/glue
cd /home/ec2-user/glue
# Write dev endpoint in a file which will be used by daemon scripts
glue_endpoint_file="/home/ec2-user/glue/glue_endpoint.txt"
apiVersion: v1
kind: Namespace
metadata:
name: airflow
---
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: efs-sc
provisioner: efs.csi.aws.com
FROM python:3.7.6-slim
# Never prompt the user for choices on installation/configuration of packages
ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux
# Airflow
ARG AIRFLOW_USER_HOME=/airflow
ARG AIRFLOW_USER="airflow"
ARG AIRFLOW_VERSION="1.10.7"
@qi-qi
qi-qi / misc.sh
Last active January 29, 2020 18:04
tar czf result.tar.gz -C `pwd` .
curl https://bashupload.com/result.tar.gz --data-binary @result.tar.gz
%%bash
export NAME=2.7
cp -R /data .
tar czf ${NAME}.tar.gz -C `pwd` .
curl https://bashupload.com/${NAME}.tar.gz --data-binary @${NAME}.tar.gz
git clone https://github.com/awslabs/amazon-kinesis-agent.git
sudo ./setup --install
...
Configuration file installed at: /etc/aws-kinesis/agent.json
Configuration details:
{
"cloudwatch.emitMetrics": true,
"kinesis.endpoint": "",
@qi-qi
qi-qi / kinesis-firehose-sample.py
Last active February 11, 2020 08:44
sample code to put record to kinesis firehose
# Message in payload should be single-line minimized json + newline '\n' append at the end of each line:
# eg:
# {"id": 111, "name": "QiQi", "email": "test@test.com"}
# {"id": 222, "name": "Hello", "email": "hello@world.com"}
# {"id": 333, "name": "tv", "email": "tv@data.com"}
import boto3
import json
client = boto3.client('firehose', aws_access_key_id='aaa', aws_secret_access_key='bbb', region_name='eu-west-1')
val ranges = collect_set(struct($"from", $"to")).as("from_to")
df.groupBy($"id")
.agg(ranges)
.withColumn("bytes_sum_unique", Util.findUniqueBytesUDF($"from_to"))
// Use the BitSet from java.util.BitSet() due to performance
val findUniqueBytesUDF: UserDefinedFunction = udf { ranges: Seq[Row] =>
ranges
.map(x => (x.getAs[Int]("legit_from"), x.getAs[Int]("legit_to")))
.aggregate(new java.util.BitSet())((bitset, range) => {
## Colab Notebook: https://colab.research.google.com/drive/1bzT9XYTymi5E-x4C_-tmpla9_Ha2PgQd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
keyword = 'ipad pro' #@param {type:"string"}
query = '+'.join(keyword.split(' '))
dt_fmt = '%Y-%m-%d %H:%M:%S'
import requests
import json
from collections import OrderedDict
d1 = {}
d2 = {}
personal_number = "yyyymmdd-xxxx"
for i in range(1000000, 1000400):
create table flights
(
year smallint,
month smallint,
day smallint,
carrier varchar(80) distkey,
origin char(3),
dest char(3),
aircraft_code char(3),
miles int,