Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save akkomar/439f55c3ce11428a59241ad39068feb6 to your computer and use it in GitHub Desktop.
Save akkomar/439f55c3ce11428a59241ad39068feb6 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{"cells": [{"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": "someDF = spark.createDataFrame([\n (8, \"bat\"),\n (64, \"mouse\"),\n (-27, \"horse\")\n], [\"number\", \"word\"])"}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": "+------+-----+\n|number| word|\n+------+-----+\n| 8| bat|\n| 64|mouse|\n| -27|horse|\n+------+-----+\n\n"}], "source": "someDF.show()"}, {"cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": "someDF.write \\\n .format(\"bigquery\") \\\n .option(\"table\",\"moz-fx-data-derived-datasets.analysis.tmp_write_test\") \\\n .option(\"temporaryGcsBucket\",\"spark-bigquery-dev-test\") \\\n .save()\n\n# temporaryGcsBucket note:\n# Create a gcs bucket for use with your dataproc clusters. Since buckets are globally named, \n# prefix your bucket name with the project-id, E.g.\n# gsutil mb -p moz-fx-data-bq-data-science gs://moz-fx-data-bq-data-science-YOURNAME-bucket "}, {"cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": "Row((8, 'bat'), {'number': 0, 'word': 1})\nRow((64, 'mouse'), {'number': 0, 'word': 1})\nRow((-27, 'horse'), {'number': 0, 'word': 1})\n"}], "source": "from google.cloud import bigquery\nclient = bigquery.Client()\n\nquery = (\n \"SELECT * FROM `moz-fx-data-derived-datasets.analysis.tmp_write_test`\"\n)\nquery_job = client.query(\n query,\n # Location must match that of the dataset(s) referenced in the query.\n location=\"US\",\n) # API request - starts the query\n\nfor row in query_job: # API request - fetches results\n # Row values can be accessed by field name or index\n# assert row[0] == row.name == row[\"name\"]\n print(row)"}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ""}], "metadata": {"kernelspec": {"display_name": "PySpark", "language": "python", "name": "pyspark"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9"}}, "nbformat": 4, "nbformat_minor": 2}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment