Skip to content

Instantly share code, notes, and snippets.

View ilyasahsan123's full-sized avatar

ilyas ahsan ilyasahsan123

  • Jakarta, Indonesia
  • 12:17 (UTC +07:00)
View GitHub Profile
CREATE OR REPLACE VIEW `tulisanmedium.digitalviews.top_10_most_film_by_director`
OPTIONS(
description="Top 5 most film by director for digital content purpose. requested by digital team"
)
AS
select
count(distinct title) number_of_films
,director
,string_agg(distinct title order by title limit 5) as films
from
AWSTemplateFormatVersion: '2010-09-09'
Transform: 'AWS::Serverless-2016-10-31'
Description: An AWS Serverless Specification template describing your function.
Resources:
dogreat:
Type: 'AWS::Serverless::Function'
Properties:
Handler: lambda_function.lambda_handler
Runtime: python3.6
CodeUri: s3://do-great-bucket/source.zip
version: 0.2
env:
variables:
INPUT_TEMPLATE: "appspec.yaml"
S3_BUCKET: "do-great-bucket"
phases:
install:
commands:
- wget https://bootstrap.pypa.io/get-pip.py
- python get-pip.py
PUT series/_mappings/movie
{
"properties": {
"film_to_franchise": {
"type": "join",
"eager_global_ordinals": true,
"relations": {
"franchise": "film"
}
},
PUT series/movie/1?routing=1
{
"id": "1",
"film_to_franchise": {
"name": "franchise"
},
"title": "Star Wars"
}
PUT series/movie/1196?routing=1
{
curl -XPOST "http://localhost:8083/connectors" -H 'Content-Type: application/json' -d'
{
"name": "debezium",
"config": {
"connector.class": "io.debezium.connector.mysql.MySqlConnector",
"database.hostname": "localhost",
"database.port": "3306",
"database.user": "root",
"database.password": "",
"database.server.id": "184054",
import json
from kafka import KafkaConsumer
from google.cloud import bigquery
if __name__ == "__main__":
# bigwuery configuration
bigquery_client = bigquery.Client()
dataset_ref = bigquery_client.dataset('DATASET_NAME')
table_ref = dataset_ref.table('TABLE_NAME')
import json
from kafka import KafkaConsumer
from google.cloud import bigquery
if __name__ == "__main__":
# bigwuery configuration
bigquery_client = bigquery.Client()
dataset_ref = bigquery_client.dataset('DATASET_NAME')
table_ref = dataset_ref.table('TABLE_NAME')
def detect_text(bucket, filename):
# use cloud vision client for extract information from image which store on cloud storage
from google.cloud import vision
vision_client = vision.ImageAnnotatorClient()
response = vision_client.text_detection({
'source': {'image_uri': "gs://{}/{}".format(bucket, filename)}
})
# get all text from extraction
import json
import apache_beam as beam
import apache_beam.transforms.window as window
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import StandardOptions
from apache_beam.io.gcp.internal.clients import bigquery
if __name__ == "__main__":
# create pipeline