This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# DROP TABLES | |
users_table_drop = "DROP TABLE IF EXISTS users" | |
departments_table_drop = "DROP TABLE IF EXISTS departments" | |
companies_table_drop = "DROP TABLE IF EXISTS companies" | |
staging_table_drop = "DROP TABLE IF EXISTS staging" | |
# CREATE TABLES | |
staging_table_create = (""" | |
CREATE TABLE IF NOT EXISTS staging( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: '3.1' | |
services: | |
db: | |
container_name: pg_container | |
image: postgres | |
restart: always | |
environment: | |
POSTGRES_USER: "postgres" | |
POSTGRES_PASSWORD: "pg12345" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
insert into {project}.{dataset}.{tablefinal} | |
(id, category, lastdate) | |
select id, category, lastdate | |
from {project}.{dataset}.{table1} | |
union all | |
select id, category, lastdate | |
from {project}.{dataset}.{table2} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
insert into {project}.{dataset}.{tabledestination} | |
(id, category, lastdate) | |
select id, category, lastdate | |
from {project}.{dataset}.{tablesource} | |
WHERE EXTRACT(YEAR FROM lastdate) = {year} and category = '{category}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from pyspark.sql import SparkSession | |
from pyspark.sql.functions import * | |
import json | |
class SparkTask: | |
def __init__(self, params): | |
self.params = params |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE TABLE {project}.{dataset}.{table} | |
( | |
{columns} | |
) | |
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE TABLE {project}.{dataset}.{table} | |
( | |
{columns} | |
) | |
PARTITION BY {partitioncolumn} | |
CLUSTER BY {clustercolumn} | |
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.oauth2 import service_account | |
from google.cloud import dataproc_v1 as dataproc | |
from google.cloud import storage | |
import os | |
class dataproc_create_cluster: | |
def __init__(self): | |
self.__credentials = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"tasks" : [ | |
{ | |
"task_id" : "startup_dataproc_1", | |
"script" : "gcs.project-pydag.iac_scripts.iac.dataproc_create_cluster", | |
"params" : "{'cluster_name':'cluster-dataproc-pydag-2022', 'project_name':'atomic-key-348214', 'region':'us-central1', '**GCP_service-account':''}", | |
"dependencies":[] | |
}, | |
{ | |
"task_id" : "initial_ingestion_1", | |
"script" : "gcs.project-pydag.module_name.spark.csv_gcs_to_bq", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
import numpy as np | |
import nltk | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
from sentence_transformers import SentenceTransformer | |
import BM25 | |
class MovieRecommender: |