Skip to content

Instantly share code, notes, and snippets.

@lrvick
Created September 20, 2011 18:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lrvick/1229850 to your computer and use it in GitHub Desktop.
Save lrvick/1229850 to your computer and use it in GitHub Desktop.
Import remote gzipped Sqlite3 SQL file as local sqlite3 database
#The goal is to emulate the following bash line properly in python:
#wget -O - "https://github.com/downloads/Tawlk/synt/sample_data.bz2" | bzcat | sqlite3 sample_data.db
import bz2
import sqlite3
import time
import urllib2
import os
from cStringIO import StringIO
response = urllib2.urlopen('https://github.com/downloads/Tawlk/synt/sample_data.bz2')
total_bytes = int(response.info().getheader('Content-Length').strip())
saved_bytes = 0
start_time = time.time()
last_seconds = 0
last_seconds_start = 0
data_buffer = StringIO()
decompressor = bz2.BZ2Decompressor()
if not os.path.exists('~/.synt'):
os.makedirs('~/.synt/')
if os.path.exists('~/.synt/samples.db'):
os.remove('~/.synt/samples.db')
conn = sqlite3.connect('~/.synt/samples.db')
prcount=0
def import_progress():
global prcount, sql_lines
prcount +=20
print "Processed %s of ~2000000 records" % (prcount)
return 0
conn.set_progress_handler(import_progress,20)
while True:
seconds = (time.time() - start_time)
chunk = response.read(8192)
if not chunk:
break
saved_bytes += len(chunk)
data_buffer.write(decompressor.decompress(chunk))
if seconds > 1:
percent = round((float(saved_bytes) / total_bytes)*100, 2)
speed = round((float(total_bytes / seconds ) / 1024),2)
speed_type = 'Kb/s'
if speed > 1000:
speed = round((float(total_bytes / seconds ) / 1048576),2)
speed_type = 'Mb/s'
if last_seconds >= 0.5:
last_seconds = 0
last_seconds_start = time.time()
print("Downloaded %d of %d Mb, %s%s (%0.2f%%)\r" % (saved_bytes/1048576, total_bytes/1048576, speed, speed_type, percent))
else:
last_seconds = (time.time() - last_seconds_start)
if saved_bytes == total_bytes:
print("Downloaded %d of %d Mb, %s%s (100%%)\r" % (saved_bytes/1048576, total_bytes/1048576, speed, speed_type))
try:
conn.executescript(data_buffer.getvalue())
except Exception, e:
print("Sqlite3 import failed with: %s" % e)
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment