Rodrigo Cunha eng-rodrigocunha

## mergePdf.gs
// https://tanaikech.github.io/2023/01/10/merging-multiple-pdf-files-as-a-single-pdf-file-using-google-apps-script/
async function mergePDF() {
  // Informe o ID da planilha onde estão os links
  var planilhaId = "1Iq6DNGk8XkfOX3kp2HDmDb1Ac7vK76brkJc6T6oydNc";

  // Informe o nome da planilha que contém os links
  var nomePlanilha = "LINKS";

  // Obter a planilha
  var planilha = SpreadsheetApp.openById(planilhaId);

## download_gcs.py
import basedosdados as bd
import pandas as pd
import glob

bd.config.project_config_path = "D:\\basedosdados\\staging"

for hour in range(14, 24, 1):
    print(hour)
    st = bd.Storage(dataset_id="br_rj_riodejaneiro_onibus_gps", table_id="registros")
    st.download(savepath=".", partitions=f"data=2023-03-08/hora={hour}", mode="staging")

## get_vaccination_status.gs
/*
 * Convert PDF file to text
 * @param {string} fileId - The Google Drive ID of the PDF
 * @param {string} language - The language of the PDF text to use for OCR
 * return {string} - The extracted text of the PDF file
 * https://www.labnol.org/extract-text-from-pdf-220422
 * IMPORTANT! https://www.labnol.org/shared-drives-google-script-220128
 */

const convertPDFToText = (fileId, language) => {

## get_vaccination_status.py
#!pip install pdfminer.six
import io
from pdfminer.high_level import extract_text

doses = ["Reforço", "Dose Adicional", "2/2", "1/2"]

# abrir o arquivo PDF
with open(r'E:\DOCUMENTOS PESSOAIS\Carteira Nacional de Vacinação Digital_4_DOSE.pdf', 'rb') as f:
    # extrair o texto do PDF
    text = extract_text(f)

## mail_web_scrapping.py
#!pip install requests
#!pip install beautifulsoup4

# https://stackoverflow.com/questions/63533115/extract-valid-email-address-using-regular-expression-and-beautifulsoup

import requests
import re
from bs4 import BeautifulSoup

email = re.compile(r'([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+){0,}')

## dbt_to_dbdiagram.rb
#!/usr/bin/env ruby
# Generate a dbdiagram for dbdiagram.io from a dbt project.
#
# Usage:
#   1. Write your model schema.yml (there's another code in this gist to make it automatically)
#   2. Run `dbt docs generate` first.
#   3. Run `dbt_to_dbdiagram.rb`
#   4. Paste the output in https://dbdiagram.io/

require 'yaml'

## bigquery_schema_generator.sql
WITH
  columns AS (
  SELECT
    "      " || "- name: " || column_name || "\n" ||
    "      " || '  description: "' || column_name || '"' AS column_statement,
    table_name
  FROM
    `rj-smtr.veiculo`.INFORMATION_SCHEMA.COLUMNS ),
  tables AS (
  SELECT

## subsidio_sumario_q_consorcio.sql
# Sumário por quinzena e consórcio
WITH
  sumario AS (
  SELECT
    EXTRACT(YEAR
    FROM
      DATA) AS ano,
    EXTRACT(MONTH
    FROM
      DATA) AS mes,

## pdf_reduct.py
#!pip install pdf-redactor

import re
from datetime import datetime

import pdf_redactor

## Set options.

options = pdf_redactor.RedactorOptions()
	// https://tanaikech.github.io/2023/01/10/merging-multiple-pdf-files-as-a-single-pdf-file-using-google-apps-script/
	async function mergePDF() {
	// Informe o ID da planilha onde estão os links
	var planilhaId = "1Iq6DNGk8XkfOX3kp2HDmDb1Ac7vK76brkJc6T6oydNc";

	// Informe o nome da planilha que contém os links
	var nomePlanilha = "LINKS";

	// Obter a planilha
	var planilha = SpreadsheetApp.openById(planilhaId);
	import basedosdados as bd
	import pandas as pd
	import glob

	bd.config.project_config_path = "D:\\basedosdados\\staging"

	for hour in range(14, 24, 1):
	print(hour)
	st = bd.Storage(dataset_id="br_rj_riodejaneiro_onibus_gps", table_id="registros")
	st.download(savepath=".", partitions=f"data=2023-03-08/hora={hour}", mode="staging")
	/*
	* Convert PDF file to text
	* @param {string} fileId - The Google Drive ID of the PDF
	* @param {string} language - The language of the PDF text to use for OCR
	* return {string} - The extracted text of the PDF file
	* https://www.labnol.org/extract-text-from-pdf-220422
	* IMPORTANT! https://www.labnol.org/shared-drives-google-script-220128
	*/

	const convertPDFToText = (fileId, language) => {
	#!pip install pdfminer.six
	import io
	from pdfminer.high_level import extract_text

	doses = ["Reforço", "Dose Adicional", "2/2", "1/2"]

	# abrir o arquivo PDF
	with open(r'E:\DOCUMENTOS PESSOAIS\Carteira Nacional de Vacinação Digital_4_DOSE.pdf', 'rb') as f:
	# extrair o texto do PDF
	text = extract_text(f)
	#!pip install requests
	#!pip install beautifulsoup4

	# https://stackoverflow.com/questions/63533115/extract-valid-email-address-using-regular-expression-and-beautifulsoup

	import requests
	import re
	from bs4 import BeautifulSoup

	email = re.compile(r'([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+){0,}')
	#!/usr/bin/env ruby
	# Generate a dbdiagram for dbdiagram.io from a dbt project.
	#
	# Usage:
	# 1. Write your model schema.yml (there's another code in this gist to make it automatically)
	# 2. Run `dbt docs generate` first.
	# 3. Run `dbt_to_dbdiagram.rb`
	# 4. Paste the output in https://dbdiagram.io/

	require 'yaml'
	WITH
	columns AS (
	SELECT
	" " \|\| "- name: " \|\| column_name \|\| "\n" \|\|
	" " \|\| ' description: "' \|\| column_name \|\| '"' AS column_statement,
	table_name
	FROM
	`rj-smtr.veiculo`.INFORMATION_SCHEMA.COLUMNS ),
	tables AS (
	SELECT
	# Sumário por quinzena e consórcio
	WITH
	sumario AS (
	SELECT
	EXTRACT(YEAR
	FROM
	DATA) AS ano,
	EXTRACT(MONTH
	FROM
	DATA) AS mes,
	#!pip install pdf-redactor

	import re
	from datetime import datetime

	import pdf_redactor

	## Set options.

	options = pdf_redactor.RedactorOptions()