ilyas ahsan ilyasahsan123

## top_10_most_film_by_director.sql
CREATE OR REPLACE VIEW `tulisanmedium.digitalviews.top_10_most_film_by_director`
OPTIONS(
  description="Top 5 most film by director for digital content purpose. requested by digital team"
)
AS
select
  count(distinct title) number_of_films
  ,director
  ,string_agg(distinct title order by title limit 5) as films
from

## dg.yaml
AWSTemplateFormatVersion: '2010-09-09'
Transform: 'AWS::Serverless-2016-10-31'
Description: An AWS Serverless Specification template describing your function.
Resources:
  dogreat:
    Type: 'AWS::Serverless::Function'
    Properties:
      Handler: lambda_function.lambda_handler
      Runtime: python3.6
      CodeUri: s3://do-great-bucket/source.zip

## buildspec.yaml
version: 0.2
env:
  variables:
    INPUT_TEMPLATE: "appspec.yaml"
    S3_BUCKET: "do-great-bucket"
phases:
  install:
    commands:
      - wget https://bootstrap.pypa.io/get-pip.py
      - python get-pip.py

## mapping_series.json
PUT series/_mappings/movie
{
  "properties": {
    "film_to_franchise": {
      "type": "join",
      "eager_global_ordinals": true,
      "relations": {
        "franchise": "film"
      }
    },

## insert_data_to_index_series.json
PUT series/movie/1?routing=1
{
  "id": "1",
  "film_to_franchise": {
    "name": "franchise"
  },
  "title": "Star Wars"
}
PUT series/movie/1196?routing=1
{

## kafka-connectors-medium
curl -XPOST "http://localhost:8083/connectors" -H 'Content-Type: application/json' -d'
{
  "name": "debezium",
  "config": {
    "connector.class": "io.debezium.connector.mysql.MySqlConnector",
    "database.hostname": "localhost",
    "database.port": "3306",
    "database.user": "root",
    "database.password": "",
    "database.server.id": "184054",

## consumer.py
import json
from kafka import KafkaConsumer
from google.cloud import bigquery

if __name__ == "__main__":

    # bigwuery configuration
    bigquery_client = bigquery.Client()
    dataset_ref = bigquery_client.dataset('DATASET_NAME')
    table_ref = dataset_ref.table('TABLE_NAME')

## consumer.py
import json
from kafka import KafkaConsumer
from google.cloud import bigquery

if __name__ == "__main__":

    # bigwuery configuration
    bigquery_client = bigquery.Client()
    dataset_ref = bigquery_client.dataset('DATASET_NAME')
    table_ref = dataset_ref.table('TABLE_NAME')

## main.py
def detect_text(bucket, filename):

    # use cloud vision client for extract information from image which store on cloud storage
    from google.cloud import vision
    vision_client = vision.ImageAnnotatorClient()
    response = vision_client.text_detection({
        'source': {'image_uri': "gs://{}/{}".format(bucket, filename)}
    })

    # get all text from extraction

## beam_streaming_sample.py
import json
import apache_beam as beam
import apache_beam.transforms.window as window
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import StandardOptions
from apache_beam.io.gcp.internal.clients import bigquery

if __name__ == "__main__":

    # create pipeline
	CREATE OR REPLACE VIEW `tulisanmedium.digitalviews.top_10_most_film_by_director`
	OPTIONS(
	description="Top 5 most film by director for digital content purpose. requested by digital team"
	)
	AS
	select
	count(distinct title) number_of_films
	,director
	,string_agg(distinct title order by title limit 5) as films
	from
	AWSTemplateFormatVersion: '2010-09-09'
	Transform: 'AWS::Serverless-2016-10-31'
	Description: An AWS Serverless Specification template describing your function.
	Resources:
	dogreat:
	Type: 'AWS::Serverless::Function'
	Properties:
	Handler: lambda_function.lambda_handler
	Runtime: python3.6
	CodeUri: s3://do-great-bucket/source.zip
	version: 0.2
	env:
	variables:
	INPUT_TEMPLATE: "appspec.yaml"
	S3_BUCKET: "do-great-bucket"
	phases:
	install:
	commands:
	- wget https://bootstrap.pypa.io/get-pip.py
	- python get-pip.py
	PUT series/_mappings/movie
	{
	"properties": {
	"film_to_franchise": {
	"type": "join",
	"eager_global_ordinals": true,
	"relations": {
	"franchise": "film"
	}
	},
	PUT series/movie/1?routing=1
	{
	"id": "1",
	"film_to_franchise": {
	"name": "franchise"
	},
	"title": "Star Wars"
	}
	PUT series/movie/1196?routing=1
	{
	curl -XPOST "http://localhost:8083/connectors" -H 'Content-Type: application/json' -d'
	{
	"name": "debezium",
	"config": {
	"connector.class": "io.debezium.connector.mysql.MySqlConnector",
	"database.hostname": "localhost",
	"database.port": "3306",
	"database.user": "root",
	"database.password": "",
	"database.server.id": "184054",
	import json
	from kafka import KafkaConsumer
	from google.cloud import bigquery

	if __name__ == "__main__":

	# bigwuery configuration
	bigquery_client = bigquery.Client()
	dataset_ref = bigquery_client.dataset('DATASET_NAME')
	table_ref = dataset_ref.table('TABLE_NAME')
	def detect_text(bucket, filename):

	# use cloud vision client for extract information from image which store on cloud storage
	from google.cloud import vision
	vision_client = vision.ImageAnnotatorClient()
	response = vision_client.text_detection({
	'source': {'image_uri': "gs://{}/{}".format(bucket, filename)}
	})

	# get all text from extraction
	import json
	import apache_beam as beam
	import apache_beam.transforms.window as window
	from apache_beam.options.pipeline_options import PipelineOptions
	from apache_beam.options.pipeline_options import StandardOptions
	from apache_beam.io.gcp.internal.clients import bigquery

	if __name__ == "__main__":

	# create pipeline