Skip to content

Instantly share code, notes, and snippets.

@tfoldi
Created April 1, 2015 17:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tfoldi/7fc61fc30c23d5937d04 to your computer and use it in GitHub Desktop.
Save tfoldi/7fc61fc30c23d5937d04 to your computer and use it in GitHub Desktop.
Python example of getting files and parsing them inside postgres database
CREATE OR REPLACE FUNCTION from_url(
-- The URL to download.
IN url text,
-- The maximum number of bytes to download.
-- If 0 then the whole response is returned.
IN max_bytes integer DEFAULT 0,
-- Should any errors (like HTTP transport errors)
-- throw an exception or simply return default_response
IN should_throw boolean DEFAULT true,
-- The default response if any errors are found.
-- Only used when should_throw is set to true
IN default_response text DEFAULT E'',
-- The encoding to use if the content-type provided by the server
-- does not provide a meaningful encoding (like text/csv or
-- application/octet-stream)
IN fallback_encoding text DEFAULT E'iso-8859-1'
)
RETURNS text
AS $$
import urllib2, traceback
# Either throws an error or returns the defeault response
# depending on the should_throw parameter of the from_url() function
def on_error():
if should_throw:
plpy.error("Error downloading '%s'\n %s" % (url, traceback.format_exc()))
else:
return default_response
try:
response = urllib2.urlopen(url)
# sometimes servers return without a charset, handle this case.
content_type = response.headers['content-type'].split('charset=')
encoding = content_type[-1] if len(content_type) > 1 else fallback_encoding
# Limit the read if necessary
if max_bytes == 0: return unicode( response.read(), encoding)
else: return unicode(response.read(max_bytes), encoding)
except:
return on_error()
$$ LANGUAGE plpythonu VOLATILE;
CREATE TYPE site_user AS (
id text,
first_name text,
last_name text,
email text,
country text,
ip_address text
);
CREATE OR REPLACE FUNCTION parse_person(
IN csv_text text,
IN skip_first_line boolean DEFAULT true
)
RETURNS SETOF site_user
AS $$
import csv, traceback
try:
dialect = csv.Sniffer().sniff(csv_text[:1024])
lines = csv_text.splitlines()
return csv.reader( (lines[1:] if skip_first_line else lines), dialect)
except:
plpy.error("Error parsing csv %s" % traceback.format_exc())
$$ LANGUAGE plpythonu VOLATILE;
CREATE TYPE sales_data AS (
varenummer text,
import_eksport text,
tid text,
statistikkvariabel text,
Utenrikshandel int
);
CREATE OR REPLACE FUNCTION parse_sales_data(
IN csv_text text,
IN skip_first_line boolean DEFAULT true
)
RETURNS SETOF sales_data
AS $$
import csv, traceback
try:
dialect = csv.Sniffer().sniff(csv_text[:1024])
lines = csv_text.splitlines()
return csv.reader( (lines[1:] if skip_first_line else lines), dialect)
except:
plpy.error("Error parsing csv %s" % traceback.format_exc())
$$ LANGUAGE plpythonu VOLATILE;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment