Skip to content

Instantly share code, notes, and snippets.

@dalinaum
Last active April 29, 2021 17:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dalinaum/44aedb22027bf1da6905e61b4ebd41a6 to your computer and use it in GitHub Desktop.
Save dalinaum/44aedb22027bf1da6905e61b4ebd41a6 to your computer and use it in GitHub Desktop.
answers_query = """
SELECT a.id, a.body, a.owner_user_id
FROM `bigquery-public-data.stackoverflow.posts_questions` AS q
INNER JOIN `bigquery-public-data.stackoverflow.posts_answers` AS a
ON q.id = a.parent_id
WHERE q.tags LIKE '%bigquery%'
"""
answers_query = """
SELECT a.id, a.body, a.owner_user_id
FROM `bigquery-public-data.stackoverflow.posts_questions` as q
INNER JOIN `bigquery-public-data.stackoverflow.posts_answers` as a
ON q.id = a.parent_id
WHERE q.tags LIKE '%bigquery%'
---------------------------------------------------------------------------
BadRequest Traceback (most recent call last)
<ipython-input-41-bf2ceee9d135> in <module>
13
14 # API request - run the query, and return a pandas DataFrame
---> 15 answers_results = answers_query_job.to_dataframe()
16
17 # Preview results
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object)
3403 ValueError: If the `pandas` library cannot be imported.
3404 """
-> 3405 return self.result().to_dataframe(
3406 bqstorage_client=bqstorage_client,
3407 dtypes=dtypes,
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job.py in result(self, page_size, max_results, retry, timeout, start_index)
3232 """
3233 try:
-> 3234 super(QueryJob, self).result(retry=retry, timeout=timeout)
3235
3236 # Return an iterator instead of returning the job.
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job.py in result(self, retry, timeout)
819 self._begin(retry=retry, timeout=timeout)
820 # TODO: modify PollingFuture so it can pass a retry argument to done().
--> 821 return super(_AsyncJob, self).result(timeout=timeout)
822
823 def cancelled(self):
/opt/conda/lib/python3.7/site-packages/google/api_core/future/polling.py in result(self, timeout)
123 the timeout is reached before the operation completes.
124 """
--> 125 self._blocking_poll(timeout=timeout)
126
127 if self._exception is not None:
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job.py in _blocking_poll(self, timeout)
3128 self._done_timeout = timeout
3129 self._transport_timeout = timeout
-> 3130 super(QueryJob, self)._blocking_poll(timeout=timeout)
3131
3132 @staticmethod
/opt/conda/lib/python3.7/site-packages/google/api_core/future/polling.py in _blocking_poll(self, timeout)
102
103 try:
--> 104 retry_(self._done_or_raise)()
105 except exceptions.RetryError:
106 raise concurrent.futures.TimeoutError(
/opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_wrapped_func(*args, **kwargs)
284 sleep_generator,
285 self._deadline,
--> 286 on_error=on_error,
287 )
288
/opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_target(target, predicate, sleep_generator, deadline, on_error)
182 for sleep in sleep_generator:
183 try:
--> 184 return target()
185
186 # pylint: disable=broad-except
/opt/conda/lib/python3.7/site-packages/google/api_core/future/polling.py in _done_or_raise(self)
81 def _done_or_raise(self):
82 """Check if the future is done and raise if it's not."""
---> 83 if not self.done():
84 raise _OperationNotComplete()
85
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job.py in done(self, retry, timeout)
3114 timeout_ms=timeout_ms,
3115 location=self.location,
-> 3116 timeout=transport_timeout,
3117 )
3118
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/client.py in _get_query_results(self, job_id, retry, project, timeout_ms, location, timeout)
1562 path=path,
1563 query_params=extra_params,
-> 1564 timeout=timeout,
1565 )
1566 return _QueryResults.from_api_repr(resource)
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/client.py in _call_api(self, retry, span_name, span_attributes, job_ref, **kwargs)
634 name=span_name, attributes=span_attributes, client=self, job_ref=job_ref
635 ):
--> 636 return call()
637 return call()
638
/opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_wrapped_func(*args, **kwargs)
284 sleep_generator,
285 self._deadline,
--> 286 on_error=on_error,
287 )
288
/opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_target(target, predicate, sleep_generator, deadline, on_error)
182 for sleep in sleep_generator:
183 try:
--> 184 return target()
185
186 # pylint: disable=broad-except
~/.local/lib/python3.7/site-packages/kaggle_gcp.py in api_request(self, *args, **kwargs)
98 """
99 try:
--> 100 return super().api_request(*args, **kwargs)
101 except Forbidden as e:
102 msg = ("Permission denied using Kaggle's public BigQuery integration. "
/opt/conda/lib/python3.7/site-packages/google/cloud/_http.py in api_request(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout)
481
482 if not 200 <= response.status_code < 300:
--> 483 raise exceptions.from_http_response(response)
484
485 if expect_json and response.content:
BadRequest: 400 GET https://dp.kaggle.net/bigquery/v2/projects/kaggle-161607/queries/77cf20c9-a1ab-40d2-9a0a-042493568bee?maxResults=0&location=US&prettyPrint=false: Query exceeded limit for bytes billed: 10000000000. 26932674560 or higher required.
(job ID: 77cf20c9-a1ab-40d2-9a0a-042493568bee)
-----Query Job SQL Follows-----
| . | . | . | . | . | . | . | . |
1:
2: SELECT a.id, a.body, a.owner_user_id
3: FROM `bigquery-public-data.stackoverflow.posts_questions` as q
4: INNER JOIN `bigquery-public-data.stackoverflow.posts_answers` as a
5: ON q.id = a.parent_id
6: WHERE q.tags LIKE '%bigquery%'
7:
| . | . | . | . | . | . | . | . |
answers_query = """
SELECT a.id, a.body, a.owner_user_id
FROM `bigquery-public-data.stackoverflow.posts_questions` as q
INNER JOIN `bigquery-public-data.stackoverflow.posts_answers` as a
ON q.id = a.parent_id
WHERE q.tags LIKE '%bigquery%'
"""
# Set up the query (cancel the query if it would use too much of
# your quota, with the limit set to 1 GB)
safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)
answers_query_job = client.query(answers_query, job_config=safe_config)
# API request - run the query, and return a pandas DataFrame
answers_results = answers_query_job.to_dataframe()
# Preview results
print(answers_results.head())
# Check your answer
q_4.check()
@dalinaum
Copy link
Author

dalinaum commented Apr 29, 2021

https://www.kaggle.com/dalinaum/exercise-joining-data/ 의 에서 4) Your first join에서 AS q와 AS a가 소문자이면 에러가 발생한다 이게 말이 되는 것인지 의문이다. weird.py대로 하면 에러가 나고 as만 대문자로 둘다 바꾸면 에러가 나지 않는다.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment