Created
April 1, 2015 17:22
-
-
Save tfoldi/7fc61fc30c23d5937d04 to your computer and use it in GitHub Desktop.
Python example of getting files and parsing them inside postgres database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE FUNCTION from_url( | |
-- The URL to download. | |
IN url text, | |
-- The maximum number of bytes to download. | |
-- If 0 then the whole response is returned. | |
IN max_bytes integer DEFAULT 0, | |
-- Should any errors (like HTTP transport errors) | |
-- throw an exception or simply return default_response | |
IN should_throw boolean DEFAULT true, | |
-- The default response if any errors are found. | |
-- Only used when should_throw is set to true | |
IN default_response text DEFAULT E'', | |
-- The encoding to use if the content-type provided by the server | |
-- does not provide a meaningful encoding (like text/csv or | |
-- application/octet-stream) | |
IN fallback_encoding text DEFAULT E'iso-8859-1' | |
) | |
RETURNS text | |
AS $$ | |
import urllib2, traceback | |
# Either throws an error or returns the defeault response | |
# depending on the should_throw parameter of the from_url() function | |
def on_error(): | |
if should_throw: | |
plpy.error("Error downloading '%s'\n %s" % (url, traceback.format_exc())) | |
else: | |
return default_response | |
try: | |
response = urllib2.urlopen(url) | |
# sometimes servers return without a charset, handle this case. | |
content_type = response.headers['content-type'].split('charset=') | |
encoding = content_type[-1] if len(content_type) > 1 else fallback_encoding | |
# Limit the read if necessary | |
if max_bytes == 0: return unicode( response.read(), encoding) | |
else: return unicode(response.read(max_bytes), encoding) | |
except: | |
return on_error() | |
$$ LANGUAGE plpythonu VOLATILE; | |
CREATE TYPE site_user AS ( | |
id text, | |
first_name text, | |
last_name text, | |
email text, | |
country text, | |
ip_address text | |
); | |
CREATE OR REPLACE FUNCTION parse_person( | |
IN csv_text text, | |
IN skip_first_line boolean DEFAULT true | |
) | |
RETURNS SETOF site_user | |
AS $$ | |
import csv, traceback | |
try: | |
dialect = csv.Sniffer().sniff(csv_text[:1024]) | |
lines = csv_text.splitlines() | |
return csv.reader( (lines[1:] if skip_first_line else lines), dialect) | |
except: | |
plpy.error("Error parsing csv %s" % traceback.format_exc()) | |
$$ LANGUAGE plpythonu VOLATILE; | |
CREATE TYPE sales_data AS ( | |
varenummer text, | |
import_eksport text, | |
tid text, | |
statistikkvariabel text, | |
Utenrikshandel int | |
); | |
CREATE OR REPLACE FUNCTION parse_sales_data( | |
IN csv_text text, | |
IN skip_first_line boolean DEFAULT true | |
) | |
RETURNS SETOF sales_data | |
AS $$ | |
import csv, traceback | |
try: | |
dialect = csv.Sniffer().sniff(csv_text[:1024]) | |
lines = csv_text.splitlines() | |
return csv.reader( (lines[1:] if skip_first_line else lines), dialect) | |
except: | |
plpy.error("Error parsing csv %s" % traceback.format_exc()) | |
$$ LANGUAGE plpythonu VOLATILE; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment