Skip to content

Instantly share code, notes, and snippets.

@NielsLiisberg
Created February 5, 2020 13:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NielsLiisberg/7881bd320eb281b566dab44c7ceeb50e to your computer and use it in GitHub Desktop.
Save NielsLiisberg/7881bd320eb281b566dab44c7ceeb50e to your computer and use it in GitHub Desktop.
SQL implementation of the ”Longest common subsequence” algorithm
-- This implements the ”Longest common subsequence” algorithm by
-- loading two source physical file members into arrays. This
-- showcases the use of arrays in SQL but also the performance.
-- I.e. 1000 lines of code will compare up to 1.000.000 times,
-- hoewever, this runs quite fast because of the feature
-- to use memory in memory to do the magic. The LCS algorithm is i.e. used
-- in GIT for tracking changes.
--
-- Read more here:
-- https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
-------------------------------------------------------------------
-- qgpl is just used here. Use your own library
cl:addlible qgpl;
create type qgpl.lcsSrcArr as char(200) array[];
create type qgpl.lcsSmallintArr as smallint array[];
create or replace procedure qgpl.Longest_common_subsequence ()
no external action
set option dbgview = *source, commit=*none
begin
declare arrNew lcsSrcArr;
declare arrOld lcssrcArr;
declare arrLenNew int;
declare arrLenOld int;
declare relNew lcsSmallintArr;
declare relOld lcsSmallintArr;
declare ixNew int;
declare ixOld int;
declare ixCntNew int;
declare ixCntOld int;
declare cnt int;
declare topOld int;
declare topNew int;
declare maxcnt int default 0;
declare i int;
declare j int;
-- load the source files into the dynamic arrays
set arrNew = (select array_agg(srcdta) from xnew );
set arrOld = (select array_agg(srcdta) from xold );
-- Get the length of each array
set arrLenNew = cardinality(arrNew);
set arrLenOld = cardinality(arrOld);
-- Initialization is required, since random update
-- is unfortunately not allowed in dynamic arrays
set i =1;
while i <= arrLenNew do
set relNew [i] = null;
set i = i + 1;
end while;
set i =1;
while i <= arrLenOld do
set relOld [i] = null;
set i = i + 1;
end while;
-- Keep on until all sequences are found
-- and sorted with longest sequence first
repeat
set ixNew = 1;
set maxcnt =0;
while ixNew <= arrLenNew do
if relNew[ixNew] is null then
set ixOld =1;
while ixOld <= arrLenOld do
if relOld[ixOld] is null and arrNew[ixNew] = arrOld[ixOld] then
set cnt = 0;
repeat
set cnt = cnt + 1;
set ixNew = ixNew +1;
set ixOld = ixOld +1;
until ixNew > arrLenNew or ixOld > arrLenOld or arrNew[ixNew] <> arrOld[ixOld]
end repeat;
if cnt > maxcnt then
set maxcnt = cnt;
set topNew = ixNew - cnt;
set topOld = ixOld - cnt;
--insert into trace (values('max ' || maxcnt));
end if;
set ixNew = ixNew -1;
set ixOld = ixOld -1;
end if;
set ixOld = ixOld +1;
end while;
end if;
set ixNew = ixNew +1;
end while;
-- flag all compared values, so we dont test/usem them again
set i = 0;
while i < maxcnt do
set relNew [topNew + i] = topOld + i;
set relOld [topOld + i] = topNew + i;
set i = i + 1;
end while;
--insert into trace (text) select 'new' || t.n from unnest (relNew) as T(n);
--insert into trace (text) select 'old' || t.n from unnest (relOld) as T(n);
until maxcnt =0
end repeat;
-- This the result
insert into trace (text) select t.n from unnest (relNew) as T(n);
end;
-- Build test case:
-- Swap the sequence of a,b,c and d,e and insert an X in between
cl: crtsrcpf file(qtemp/xold) mbr(*file);
insert into xold (srcdta) values ('a'), ('b'), ('c'), ('d'), ('e');
cl:crtsrcpf file(qtemp/xnew) mbr(*file);
insert into xnew (srcdta) values ('d'), ('e'),('X'),('a'), ('b'), ('c');
-- Simple trace file
create or replace table qgpl.trace (
text varchar(4096)
);
-- Run the test
call qgpl.Longest_common_subsequence();
-- Show the result:
-- This is the rownumbers in "old" where the exists in "new"
select * from trace;
-- reset test:
cl: dltf qgpl/trace;
cl: dltf qtemp/xold;
cl: dltf qtemp/xnew;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment