Skip to content

Instantly share code, notes, and snippets.

@nikparmar
Created June 6, 2017 08:04
Show Gist options
  • Save nikparmar/1d88ca4d01cd41517c014fc33f70105f to your computer and use it in GitHub Desktop.
Save nikparmar/1d88ca4d01cd41517c014fc33f70105f to your computer and use it in GitHub Desktop.
from google.cloud import bigquery
# from google.cloud.storage import Blob, Client as storage_client
from google.cloud import storage
import uuid
import time
class BigQueryToGCS(object):
def __init__(self, load_config_from_json=True):
self.config_from_file = load_config_from_json
def wait_for_job(self, job):
while True:
job.reload()
if job.state == 'DONE':
if job.error_result:
raise RuntimeError(job.errors)
return
time.sleep(1)
def export_data_to_gcs(self, dataset_name, table_name, bucket_name):
"""
:param dataset_name: This means the 54190935 account number for ACM
this corresponds to the dataset in BigQuery
:param table_name: The table name stored in BigQuery are of the format
"ga_sessions_YYYYMMDD"
:param bucket_name: Name of the bucket where the data needs to be
exported
:return:
"""
table_name = 'ga_sessions_{0}'.format(table_name)
destination = 'gs://{0}/{1}.json'.format(bucket_name, table_name)
# if self.config_from_file is True:
bigquery_client = bigquery.Client()
dataset = bigquery_client.dataset(dataset_name)
table = dataset.table(table_name)
job_name = str(uuid.uuid4())
job = bigquery_client.extract_table_to_storage(
job_name, table, destination)
job.destination_format = "NEWLINE_DELIMITED_JSON"
job.begin()
self.wait_for_job(job)
print 'Exported {}:{} to {}'.format(
dataset_name, table_name, destination)
def download_to_local_from_gcs(self):
client = storage.Client()
print client.SCOPE
a = client.list_buckets()
# print a
# for i in a:
# print i
bucket = client.get_bucket('astro-big-data-project.appspot.com')
print bucket.acl
# blob = Blob('bqoutput/ga_sessions_20170604.json', bucket=bucket)
# with open('my-secure-file.json', 'wb') as file_obj:
# blob.download_to_file(file_obj)
_obj = BigQueryToGCS(True)
# _obj.export_data_to_gcs(54190935, 20170604,
# 'astro-big-data-project.appspot.com/bqoutput')
_obj.download_to_local_from_gcs()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment