Skip to content

Instantly share code, notes, and snippets.

@timhberry
Created January 17, 2023 12:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timhberry/a73f6949f68b6e5a3a724172121164c2 to your computer and use it in GitHub Desktop.
Save timhberry/a73f6949f68b6e5a3a724172121164c2 to your computer and use it in GitHub Desktop.
import csv
from google.cloud import bigquery
from time import sleep
from random import random
client = bigquery.Client()
table_id = "ab-academy-demo.btlearningpath.city_speeds_bq_stream"
filename = "city_daily_speeds-10k.csv"
counter = 0
with open(filename, encoding="ascii", errors="surrogateescape") as data_file:
reader = csv.reader(data_file)
# skip the first row of the CSV file.
next(reader)
for line in reader:
# prepare each line as a JSON record to match the Avro schema
# we don't actually need the schema in our script, unless we want
# to use a DatumWriter for binary insertion
rows_to_insert = [{
"country": line[0],
"country_code": line[1],
"region": line[2],
"region_code": line[3],
"city": line[4],
"date": line[5],
"download_kbps": float(line[6]),
"upload_kbps": float(line[7]),
"total_tests": int(line[8]),
"distance_miles": float(line[9])
}]
counter = counter + 1
errors = client.insert_rows_json(table_id, rows_to_insert, row_ids=[None])
if errors == []:
print(counter)
else:
print("Encountered errors while inserting rows: {}".format(errors))
# sleep between 0 and 1 seconds
sleep(random())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment