Skip to content

Instantly share code, notes, and snippets.

@reizist
Created May 20, 2022 10:41
Show Gist options
  • Save reizist/74a31709ff8104906cebc243c965996c to your computer and use it in GitHub Desktop.
Save reizist/74a31709ff8104906cebc243c965996c to your computer and use it in GitHub Desktop.
require "logger"
require 'pry-byebug'
require "google/cloud/dlp/v2"
include Google::Cloud::Dlp::V2
ROW_SCAN_LIMIT = 10000
# Doc: https://googleapis.dev/ruby/google-cloud-dlp-v2/latest/Google/Cloud/Dlp/V2.html
client = DlpService::Client.new
target_tables = {
'pj_name': {
'dataset_name': [
'table_name'
}
def inspect_config
InspectConfig.new(
# see: https://cloud.google.com/dlp/docs/infotypes-reference
info_types: [
InfoType.new(name: 'EMAIL_ADDRESS'),
InfoType.new(name: 'PERSON_NAME'),
InfoType.new(name: 'PHONE_NUMBER')
],
min_likelihood: 'POSSIBLE',
# content_options: ,
# custom_info_types: ,
# exclude_info_types: ,
include_quote: true,
# limits: ,
# rule_set: ,
)
end
def storage_config(project_name, dataset_name, table_name)
bq_options = BigQueryOptions.new(
rows_limit: ROW_SCAN_LIMIT,
table_reference: BigQueryTable.new(
project_id: project_name,
dataset_id: dataset_name,
table_id: table_name
),
# excluded_fields: ,
# identifying_fields: ,
# rows_limit_percent: ,
sample_method: BigQueryOptions::SampleMethod::RANDOM_START,
)
StorageConfig.new(big_query_options: bq_options)
end
def inspect_job(project_name, dataset_name, table_name)
inspect_job = InspectJobConfig.new(
actions: [
Action.new(
save_findings: Action::SaveFindings.new(
output_config: OutputStorageConfig.new(
table: BigQueryTable.new(
project_id: 'data-infra-secure',
dataset_id: 'dlp',
table_id: 'organon-summary'
)
)
)
)
],
inspect_config: inspect_config,
# inspect_template_name: '',
storage_config: storage_config(project_name, dataset_name, table_name),
)
end
target_tables.each do |pj, datasets|
datasets.each do |dataset, tables|
tables.each do |table|
puts "Start Inspect Job: #{pj}-#{dataset}-#{table}"
request = CreateDlpJobRequest.new(
job_id: "#{pj}-#{dataset}-#{table}-#{Time.now.to_i}",
parent: 'projects/data-infra-secure/locations/asia-northeast1',
inspect_job: inspect_job(pj, dataset, table)
)
result = client.create_dlp_job request
p result
end
end
end
module MyLogger
LOGGER = Logger.new $stderr, level: Logger::WARN
def logger
LOGGER
end
end
# Define a gRPC module-level logger method before grpc/logconfig.rb loads.
module GRPC
extend MyLogger
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment