Skip to content

Instantly share code, notes, and snippets.

@saleiva
Created May 6, 2014 16:43
Show Gist options
  • Save saleiva/a176cca1b1384d6d8e39 to your computer and use it in GitHub Desktop.
Save saleiva/a176cca1b1384d6d8e39 to your computer and use it in GitHub Desktop.
Insert fake data on a CartoDB table
import string
import urllib
import urllib2
import math, random
import time
from optparse import OptionParser
account_name = ''
account_api_key = ''
USAGE = """
usage:
simple-write.py -k KEY -a ACCOUNT
arguments:
-k, --key api_key from your CartoDB account
-a, --account name of your CartoDB account
"""
parser = OptionParser()
parser.add_option("-k", "--key", dest="key",
help="api_key from your CartoDB account", metavar="KEY", default=account_api_key)
parser.add_option("-a", "--account", dest="account",
help="name of your CartoDB account", metavar="ACCOUNT", default=account_name)
(options, args) = parser.parse_args()
def insertvalue(lat, lng, measure):
# the_geom column in CartoDB is a projected geometry, CDB_LatLng is a helper
# function in SQL that will allow you to quickly create those from
# latitude and longitude values
the_geom = "CDB_LatLng(%f, %f)" % (lat, lng)
# this is our SQL INSERT statement. You can read more about their format in
# the PostgreSQL documentation, http://www.postgresql.org/docs/9.3/static/dml-insert.html
value = "(%s, %d)" % (the_geom, measure)
return value
def writerequest(account, key, sql):
# The API URL for your account
url = "https://%s.cartodb.com/api/v1/sql" % account
# these are the parameters we need to send with our https request
# api_key is unique to your account, and you should not share it with others
# q is the SQL query we created above
params = {
'api_key' : key,
'q' : sql
}
# create your HTTP request
req = urllib2.Request(url, urllib.urlencode(params))
response = urllib2.urlopen(req)
return response
def fakedata():
# we're going to fake data around to a bullseye
c_lat = 36.175
c_lng = -115.136389
# pick a random angle
r_radians = 2.0 * math.pi * random.random()
# pick a random distance
r_degrees = math.exp(math.log(2) * random.random()) - 1.0
lat = c_lat + (r_degrees * math.cos(r_radians))
lng = c_lng + (r_degrees * math.sin(r_radians))
# pick a random number from 0 to 99
measure = math.floor(random.random() * 37) + 1977
return lat, lng, measure
def batchinsert(account, key, values):
sql = "INSERT INTO elvis_lives (the_geom, year) (VALUES %s);" % ','.join(values)
# do a single insert for all 100 statements
response = writerequest(account, key, sql)
print "%d records complete" % len(values)
def importjob(account, key):
# It is better to do inserts as batch jobs. Around 100-500 insert statements per request
# Is much better than 100-500 individual requests
record_count = 0
total_records = 0
values = []
while record_count < 211:
lat, lng, measure = fakedata()
value = insertvalue(lat, lng, measure)
values.append(value)
record_count = record_count + 1
# here we send a request for each 100 records
if record_count%100 == 0:
batchinsert(account, key, values)
values = []
total_records = total_records + 1
# clean up any remaining records and send request
if len(values) > 0:
batchinsert(account, key, values)
print "%d total records complete" % total_records
def main():
# if options.key == None or options.account == None:
# print USAGE
# else:
importjob(options.account, options.key)
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment