sthum/psql_text_similaritry_operator.sql

## psql_text_similaritry_operator.sql
--- If you are missing the pypythonu extension, install it first
--- (Ubuntu): sudo apt-get install python-psycopg2

--- Activate plpython
CREATE EXTENSION plpythonu;

--- Create the profiling function
CREATE OR REPLACE FUNCTION similarity(text, text) RETURNS numeric AS $$
import difflib
return difflib.SequenceMatcher(None,args[0], args[1]).ratio()
$$ LANGUAGE 'plpythonu' IMMUTABLE STRICT;

--- Optional: Create an operator for this function, in this case '<#>'
--- Otherwise use it like 'similarity(name1,name2)' in your query
CREATE OPERATOR <#>
(PROCEDURE=similarity,
LEFTARG=text,
RIGHTARG=text);

--- Values go from 0.0 to 1.0
--- Execute some example queries to get a hang for the values and their actually meaning of similarity.
--- Example: SELECT * FROM employees e where similarity('steffen',e.name) > 0.5
--- Queries for Employee Entries with a name similar to 'steffen', like 'stefan' etc...
--- I'd consider > 0.5 as 'very similar'
	--- If you are missing the pypythonu extension, install it first
	--- (Ubuntu): sudo apt-get install python-psycopg2

	--- Activate plpython
	CREATE EXTENSION plpythonu;

	--- Create the profiling function
	CREATE OR REPLACE FUNCTION similarity(text, text) RETURNS numeric AS $$
	import difflib
	return difflib.SequenceMatcher(None,args[0], args[1]).ratio()
	$$ LANGUAGE 'plpythonu' IMMUTABLE STRICT;

	--- Optional: Create an operator for this function, in this case '<#>'
	--- Otherwise use it like 'similarity(name1,name2)' in your query
	CREATE OPERATOR <#>
	(PROCEDURE=similarity,
	LEFTARG=text,
	RIGHTARG=text);

	--- Values go from 0.0 to 1.0
	--- Execute some example queries to get a hang for the values and their actually meaning of similarity.
	--- Example: SELECT * FROM employees e where similarity('steffen',e.name) > 0.5
	--- Queries for Employee Entries with a name similar to 'steffen', like 'stefan' etc...
	--- I'd consider > 0.5 as 'very similar'