oelmekki/sentences_distance.sql

## sentences_distance.sql
-- $ createdb test
-- $ psql test


CREATE TABLE sentences( words varchar(255)[] );
INSERT INTO sentences VALUES( ARRAY['hello', 'world', 'how', 'are', 'you', 'doing', 'today'] );
INSERT INTO sentences VALUES( ARRAY['how', 'are', 'you', 'doing', 'today', 'Jim'] );
INSERT INTO sentences VALUES( ARRAY['Maybe', 'this', 'sentence', 'will', 'not', 'be', 'as', 'close'] );

-- extract as function for less typing
CREATE FUNCTION array_varchar_intersect( varchar(255)[], varchar(255)[] )
  RETURNS varchar(255)[]
  language sql
as $FUNCTION$
    SELECT ARRAY(
        SELECT UNNEST( lower( $1::text )::varchar(255)[] )
        INTERSECT
        SELECT UNNEST( lower( $2::text )::varchar(255)[] )
    );
$FUNCTION$;


-- Input is `ARRAY[ 'Are', 'you', 'doing', 'fine', 'jim' ]`

SELECT words, array_varchar_intersect( ARRAY[ 'Are', 'you', 'doing', 'fine', 'jim' ], words ) as matching
FROM sentences
ORDER BY array_length( array_varchar_intersect( ARRAY[ 'Are', 'you', 'doing', 'fine', 'jim' ], words ), 1 ) DESC NULLS LAST;


--                    words                    |      matching
-- --------------------------------------------+---------------------
--  {how,are,you,doing,today,Jim}              | {are,you,jim,doing}
--  {hello,world,how,are,you,doing,today}      | {are,you,doing}
--  {Maybe,this,sentence,will,not,be,as,close} | {}
-- (3 rows)
	-- $ createdb test
	-- $ psql test


	CREATE TABLE sentences( words varchar(255)[] );
	INSERT INTO sentences VALUES( ARRAY['hello', 'world', 'how', 'are', 'you', 'doing', 'today'] );
	INSERT INTO sentences VALUES( ARRAY['how', 'are', 'you', 'doing', 'today', 'Jim'] );
	INSERT INTO sentences VALUES( ARRAY['Maybe', 'this', 'sentence', 'will', 'not', 'be', 'as', 'close'] );

	-- extract as function for less typing
	CREATE FUNCTION array_varchar_intersect( varchar(255)[], varchar(255)[] )
	RETURNS varchar(255)[]
	language sql
	as $FUNCTION$
	SELECT ARRAY(
	SELECT UNNEST( lower( $1::text )::varchar(255)[] )
	INTERSECT
	SELECT UNNEST( lower( $2::text )::varchar(255)[] )
	);
	$FUNCTION$;



	-- Input is `ARRAY[ 'Are', 'you', 'doing', 'fine', 'jim' ]`

	SELECT words, array_varchar_intersect( ARRAY[ 'Are', 'you', 'doing', 'fine', 'jim' ], words ) as matching
	FROM sentences
	ORDER BY array_length( array_varchar_intersect( ARRAY[ 'Are', 'you', 'doing', 'fine', 'jim' ], words ), 1 ) DESC NULLS LAST;



	-- words \| matching
	-- --------------------------------------------+---------------------
	-- {how,are,you,doing,today,Jim} \| {are,you,jim,doing}
	-- {hello,world,how,are,you,doing,today} \| {are,you,doing}
	-- {Maybe,this,sentence,will,not,be,as,close} \| {}
	-- (3 rows)