Skip to content

Instantly share code, notes, and snippets.

@Chestermozhao
Chestermozhao / airflow_default_args.py
Created November 9, 2019 10:54
airflow default_args
default_args = {
"owner": "airflow",
# 何時開始這個cronjob
"start_date": airflow.utils.dates.days_ago(2),
# 何時結束
#"end_date": datetime(2019, 12, 30),
# 昨天失敗今天就不會跑了
"depends_on_past": False,
# 失敗會通知的email
"email": ["xxxx@gmail.com"],
@Chestermozhao
Chestermozhao / airflow_dags_and_tasks.py
Last active November 9, 2019 12:16
airflow dags and tasks
def fn_superman():
print("取得使用者的閱讀紀錄")
print("去漫畫網站看有沒有新的章節")
print("跟紀錄比較,有沒有新連載?")
# Murphy's Law
accident_occur = time.time() % 2 > 1
if accident_occur:
print("\n天有不測風雲,人有旦夕禍福")
print("工作遇到預期外狀況被中斷\n")
@Chestermozhao
Chestermozhao / airflow_import_modules.py
Last active November 9, 2019 12:17
airflow import modules
import time
from datetime import datetime, timedelta
import airflow
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
@Chestermozhao
Chestermozhao / items.py
Last active November 23, 2019 14:12
splash_scrapy crawler
# -*- coding: utf-8 -*-
import scrapy
class SplashTestItem(scrapy.Item):
hometeam = scrapy.Field()
@Chestermozhao
Chestermozhao / splash_screenshot.py
Last active November 23, 2019 14:27
scrapy_splash screenshot
# -*- coding: utf-8 -*-
import json
import base64
from scrapy.spiders import Spider
from scrapy_splash import SplashRequest
from items import SplashTestItem
class SplashSpider(Spider):
name = "splash_screen_shot"
@Chestermozhao
Chestermozhao / logger.py
Last active November 23, 2019 14:35
scrapy_splash har log
# -*- coding: utf-8 -*-
import logging
import sys
# create logger
logger_name = 'scrapy.splash.log'
log = logging.getLogger(logger_name)
log.setLevel(logging.DEBUG)
@Chestermozhao
Chestermozhao / es_insert_by_bulk.py
Created December 8, 2019 15:49
elasticsearch insert many by bulk api
# -*- coding: utf-8 -*-
import json
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from const import (
SAMPLE_DATA_DIR,
ES_HOST,
ES_PORT
)
@Chestermozhao
Chestermozhao / const.py
Created December 8, 2019 15:50
elasticsearch_configs
# -*- coding: utf-8 -*-
# file path
SAMPLE_DATA_DIR = "demo_data.json"
# elasticsearch config
ES_HOST = "http://localhost"
ES_PORT = 9200
@Chestermozhao
Chestermozhao / demo_data.json
Created December 8, 2019 15:51
elasticsearch demo data json
[
{
"_index": "test-index",
"_type": "authors",
"_id": 1,
"_source": {
"author": "Chestermo",
"gender": "male",
"age": 24,
"body_fat": "15%",
@Chestermozhao
Chestermozhao / es_insert_one.py
Last active December 8, 2019 17:32
elasticsearch insert one data
# -*- coding: utf-8 -*-
from elasticsearch import Elasticsearch
es = Elasticsearch(host="localhost", port=9200)
es = Elasticsearch()
data = {
"author": "Chestermo",
"gender": "male",
"age": "24",