tfoldi/pgpydl.sql

## pgpydl.sql
CREATE OR REPLACE FUNCTION from_url(
  -- The URL to download.
  IN url text,
  -- The maximum number of bytes to download.
  -- If 0 then the whole response is returned.
  IN max_bytes integer DEFAULT 0,
  -- Should any errors (like HTTP transport errors)
  -- throw an exception or simply return default_response
  IN should_throw boolean DEFAULT true,
  -- The default response if any errors are found.
  -- Only used when should_throw is set to true
  IN default_response text DEFAULT E'',
  -- The encoding to use if the content-type provided by the server
  -- does not provide a meaningful encoding (like text/csv or
  -- application/octet-stream)
  IN fallback_encoding text DEFAULT E'iso-8859-1'
)
  RETURNS text
AS $$
  import urllib2, traceback

  # Either throws an error or returns the defeault response
  # depending on the should_throw parameter of the from_url() function
  def on_error():
    if should_throw:
      plpy.error("Error downloading '%s'\n %s" % (url, traceback.format_exc()))
    else:
      return default_response


  try:
    response = urllib2.urlopen(url)

    # sometimes servers return without a charset, handle this case.
    content_type = response.headers['content-type'].split('charset=')
    encoding = content_type[-1] if len(content_type) > 1 else fallback_encoding

    # Limit the read if necessary
    if max_bytes == 0: return unicode( response.read(), encoding)
    else: return unicode(response.read(max_bytes), encoding)
  except:
    return on_error()

$$ LANGUAGE plpythonu VOLATILE;


CREATE TYPE site_user AS (
    id text,
    first_name text,
    last_name text,
    email text,
    country text,
    ip_address text
  );


CREATE OR REPLACE FUNCTION parse_person(
  IN csv_text text,
  IN skip_first_line boolean DEFAULT true
)
  RETURNS SETOF site_user
AS $$
  import csv, traceback

  try:
    dialect = csv.Sniffer().sniff(csv_text[:1024])
    lines = csv_text.splitlines()
    return csv.reader( (lines[1:] if skip_first_line else lines), dialect)

  except:
    plpy.error("Error parsing csv %s" % traceback.format_exc())

$$ LANGUAGE plpythonu VOLATILE;


CREATE TYPE sales_data AS (
  varenummer text,
  import_eksport text,
  tid text,
  statistikkvariabel text,
  Utenrikshandel int
);


CREATE OR REPLACE FUNCTION parse_sales_data(
  IN csv_text text,
  IN skip_first_line boolean DEFAULT true
)
  RETURNS SETOF sales_data
AS $$
  import csv, traceback

  try:
    dialect = csv.Sniffer().sniff(csv_text[:1024])
    lines = csv_text.splitlines()
    return csv.reader( (lines[1:] if skip_first_line else lines), dialect)

  except:
    plpy.error("Error parsing csv %s" % traceback.format_exc())

$$ LANGUAGE plpythonu VOLATILE;
	CREATE OR REPLACE FUNCTION from_url(
	-- The URL to download.
	IN url text,
	-- The maximum number of bytes to download.
	-- If 0 then the whole response is returned.
	IN max_bytes integer DEFAULT 0,
	-- Should any errors (like HTTP transport errors)
	-- throw an exception or simply return default_response
	IN should_throw boolean DEFAULT true,
	-- The default response if any errors are found.
	-- Only used when should_throw is set to true
	IN default_response text DEFAULT E'',
	-- The encoding to use if the content-type provided by the server
	-- does not provide a meaningful encoding (like text/csv or
	-- application/octet-stream)
	IN fallback_encoding text DEFAULT E'iso-8859-1'
	)
	RETURNS text
	AS $$
	import urllib2, traceback

	# Either throws an error or returns the defeault response
	# depending on the should_throw parameter of the from_url() function
	def on_error():
	if should_throw:
	plpy.error("Error downloading '%s'\n %s" % (url, traceback.format_exc()))
	else:
	return default_response


	try:
	response = urllib2.urlopen(url)

	# sometimes servers return without a charset, handle this case.
	content_type = response.headers['content-type'].split('charset=')
	encoding = content_type[-1] if len(content_type) > 1 else fallback_encoding

	# Limit the read if necessary
	if max_bytes == 0: return unicode( response.read(), encoding)
	else: return unicode(response.read(max_bytes), encoding)
	except:
	return on_error()

	$$ LANGUAGE plpythonu VOLATILE;



	CREATE TYPE site_user AS (
	id text,
	first_name text,
	last_name text,
	email text,
	country text,
	ip_address text
	);


	CREATE OR REPLACE FUNCTION parse_person(
	IN csv_text text,
	IN skip_first_line boolean DEFAULT true
	)
	RETURNS SETOF site_user
	AS $$
	import csv, traceback

	try:
	dialect = csv.Sniffer().sniff(csv_text[:1024])
	lines = csv_text.splitlines()
	return csv.reader( (lines[1:] if skip_first_line else lines), dialect)

	except:
	plpy.error("Error parsing csv %s" % traceback.format_exc())

	$$ LANGUAGE plpythonu VOLATILE;





	CREATE TYPE sales_data AS (
	varenummer text,
	import_eksport text,
	tid text,
	statistikkvariabel text,
	Utenrikshandel int
	);


	CREATE OR REPLACE FUNCTION parse_sales_data(
	IN csv_text text,
	IN skip_first_line boolean DEFAULT true
	)
	RETURNS SETOF sales_data
	AS $$
	import csv, traceback

	try:
	dialect = csv.Sniffer().sniff(csv_text[:1024])
	lines = csv_text.splitlines()
	return csv.reader( (lines[1:] if skip_first_line else lines), dialect)

	except:
	plpy.error("Error parsing csv %s" % traceback.format_exc())

	$$ LANGUAGE plpythonu VOLATILE;