Skip to content

Instantly share code, notes, and snippets.

@Chestermozhao
Chestermozhao / es_search_data.py
Created December 8, 2019 17:51
elasticsearch search data
# -*- coding: utf-8 -*-
from elasticsearch import Elasticsearch
from const import (
ES_HOST,
ES_PORT
)
es = Elasticsearch(host=ES_HOST, port=ES_PORT)
es = Elasticsearch()
@Chestermozhao
Chestermozhao / es_update_data.py
Last active December 8, 2019 18:28
elasticsearch update data
# -*- coding: utf-8 -*-
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from const import (
ES_HOST,
ES_PORT
)
es = Elasticsearch(host= ES_HOST, port= ES_PORT)
es = Elasticsearch()
@Chestermozhao
Chestermozhao / es_delete_data.py
Created December 8, 2019 18:46
elasticsearch delete data
# -*- coding: utf-8 -*-
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from const import (
ES_HOST,
ES_PORT
)
es = Elasticsearch(host= ES_HOST, port= ES_PORT)
es = Elasticsearch()
@Chestermozhao
Chestermozhao / es_prefix_and_full_text_search_data.py
Created December 8, 2019 19:21
elasticsearch prefix search and full-text search
# -*- coding: utf-8 -*-
from elasticsearch import Elasticsearch
from const import (
ES_HOST,
ES_PORT
)
es = Elasticsearch(host= ES_HOST, port= ES_PORT)
es = Elasticsearch()
@Chestermozhao
Chestermozhao / python_shell.sh
Created December 21, 2019 17:24
shell script
#!/bin/bash
export PYTHONPATH=<workplace>
/Users/bill/venv/bin/python3 subscribing_channels.py
@Chestermozhao
Chestermozhao / airflow_single_task.py
Created December 21, 2019 18:07
airflow: single task
# Importing modules
from datetime import timedelta, datetime
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
# Setting default arguments
default_args = {
"owner": "Chester mo",
"start_date": datetime(2021, 1, 1, 0, 0),
@Chestermozhao
Chestermozhao / channels.json
Created December 21, 2019 18:41
airflow: add channel list
{
# channel_name: youtube channel name
# channel_id: you need visit youtube, and copy the channel_id from url
# like as `https://www.youtube.com/channel/UC-QDfvrRIDB6F0bIO4I4HkQ`
# channel name is Pretty Printed(you can define by yourself, we only use channel_id to crawling)
# channel id is UC-QDfvrRIDB6F0bIO4I4HkQ
# please delete comment above, when you running airflow
"channel_name": "channel_id",
}
@Chestermozhao
Chestermozhao / update_mongo_from_lst.py
Created December 21, 2019 18:44
airflow: Create the list of subscribing
import json
from .mongo import collection_sub_channel
def init_channel_info(name, _id):
init_data = {
"channel_name": name,
"channel_id": _id,
"previous_title": "",
"previous_link": "",
@Chestermozhao
Chestermozhao / mongo.py
Created December 21, 2019 18:46
airflow: mongo.py
import pymongo
# init mongo config
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["youtube"]
collection_sub_channel = db["sub_channel"]
@Chestermozhao
Chestermozhao / check_and_update_record.py
Created December 22, 2019 08:29
airflow: check_and_update_record
from .mongo import collection_sub_channel
def check_and_update_record(mode, **context):
if mode == "check":
sub_channels = collection_sub_channel.find()
sub_channels = list(sub_channels) if sub_channels else ""
return sub_channels
elif mode == "update":
print("Saving latest youtube information..")