This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import re | |
import dateutil.parser as parser | |
from dateutil import tz | |
from datetime import datetime | |
import csv | |
import s3fs | |
import pickle | |
from airflow.hooks.base_hook import BaseHook |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"steps": | |
[ | |
{ | |
"executor_memory": "18G", | |
"executor_cores": "4", | |
"description" : "Reading from dataset data and filtering", | |
"name": "step_0", | |
"guiid": "0", | |
"ActionOnFailure": "CANCEL_AND_WAIT", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"InstanceFleets": [ | |
{ | |
"Name": "fleetmaster", | |
"InstanceFleetType": "MASTER", | |
"TargetOnDemandCapacity": 1, | |
"InstanceTypeConfigs": [ | |
{"InstanceType":"m5.xlarge"} | |
] | |
}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
import numpy as np | |
import nltk | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
from sentence_transformers import SentenceTransformer | |
import BM25 | |
class MovieRecommender: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import datetime | |
from airflow import DAG | |
from airflow.models import Variable | |
from airflow.models.connection import Connection | |
from airflow.contrib.hooks.aws_hook import AwsHook | |
from airflow.providers.amazon.aws.hooks.s3 import S3Hook | |
from airflow.hooks.postgres_hook import PostgresHook | |
from airflow.operators.dummy_operator import DummyOperator | |
from airflow.operators.postgres_operator import PostgresOperator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE TABLE {project}.{dataset}.{table} | |
( | |
{columns} | |
) | |
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from pyspark.sql import SparkSession | |
from pyspark.sql.functions import * | |
import json | |
class SparkTask: | |
def __init__(self, params): | |
self.params = params |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
insert into {project}.{dataset}.{tabledestination} | |
(id, category, lastdate) | |
select id, category, lastdate | |
from {project}.{dataset}.{tablesource} | |
WHERE EXTRACT(YEAR FROM lastdate) = {year} and category = '{category}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
insert into {project}.{dataset}.{tablefinal} | |
(id, category, lastdate) | |
select id, category, lastdate | |
from {project}.{dataset}.{table1} | |
union all | |
select id, category, lastdate | |
from {project}.{dataset}.{table2} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE TABLE {project}.{dataset}.{table} | |
( | |
{columns} | |
) | |
PARTITION BY {partitioncolumn} | |
CLUSTER BY {clustercolumn} | |
; |
OlderNewer