Skip to content

Instantly share code, notes, and snippets.

View TiGaI's full-sized avatar
🏠
Working from home

Junjie TiGaI

🏠
Working from home
View GitHub Profile
def init_callbacks(app, countFreq):
@app.callback(
[dash.dependencies.Output('frequency_word_bargraph', 'figure'),
dash.dependencies.Output('matplotlib-graph', 'src')],
[dash.dependencies.Input('range_frequency_number', 'value')])
def update_graph(value):
newGraph = countFreq[value[0]:value[1]]
wordcloud = WordCloud(height=500, width=500, background_color="white", contour_color='white', colormap="magma").generate_from_frequencies(newGraph)
buf = io.BytesIO() # in-memory files
html_layout = '''
<!DOCTYPE html>
<html>
<head>
{%metas%}
<title>{%title%}</title>
<link type="text/css" rel="stylesheet" href="/static/css/font-awesome-4.1.0.min.css" />
<link type="text/css" rel="stylesheet" href="/static/css/bootstrap-3.1.1.min.css">
<link type="text/css" rel="stylesheet" href="/static/css/bootstrap-theme-3.1.1.min.css" />
<link type="text/css" rel="stylesheet" href="/static/css/layout.main.css" />
import plotly
import plotly.graph_objs as go
import pandas as pd
import numpy as np
import json
import nltk
#for NLP purpose
# VERSION 1.10.9
# AUTHOR: Matthieu "Puckel_" Roisil
# DESCRIPTION: Basic Airflow container
# BUILD: docker build --rm -t puckel/docker-airflow .
# SOURCE: https://github.com/puckel/docker-airflow
FROM python:3.6-slim-buster
LABEL maintainer="Puckel_"
# Never prompt the user for choices on installation/configuration of packages
from airflow import DAG
from airflow.operators.python_operator import PythonOperator, BranchPythonOperator
from airflow.operators.dummy_operator import DummyOperator
import logging
import datetime
import twint
#directly import
# from sensors.gcs_bq_custom_sensor import GoogleCloudStorageBigQueryUpdateSensor
class CheckBQDuplication(BaseOperator):
"""
Check if a specific table in BigQuery contains duplicated data after the load
"""
@apply_defaults
def __init__(
self,
dataset_name,
bigquery_table_name,
bigquery_table_key,
def checkingYesterdayTweet(bucket_name, project, credentials_path, **kwargs):
credentials = service_account.Credentials.from_service_account_file(credentials_path) if credentials_path else None
storage_client = storage.Client(project=project, credentials=credentials)
bucket_name = "airflowexample"
bucket = storage_client.get_bucket(bucket_name)
yesterday = datetime.datetime.today() - datetime.timedelta(days=1)
searchTerm = "coronavirus"
filename = f"tweet-{searchTerm}-{yesterday.strftime('%Y-%m-%d')}"
from airflow import DAG
from google.cloud import storage
from google.oauth2 import service_account
from airflow.operators.python_operator import PythonOperator
from airflow.operators.dummy_operator import DummyOperator
from io import BytesIO, StringIO
import pandas as pd
import numpy as np
from airflow.operators.sensors import BaseSensorOperator
from airflow.utils.decorators import apply_defaults
from google.cloud import bigquery
from google.cloud import storage
from google.oauth2 import service_account
import datetime
class GoogleCloudStorageBigQueryUpdateSensor(BaseSensorOperator):
"""
See if the modified date of the big query dataset is less than modified date of the GCS files.
from airflow import DAG
from google.cloud import storage
from google.oauth2 import service_account
from airflow.operators.python_operator import PythonOperator
from io import BytesIO, StringIO
import pandas as pd
import numpy as np
from datetime import datetime
import logging