Last active
December 29, 2015 03:49
-
-
Save ashelly/7610713 to your computer and use it in GitHub Desktop.
Tests that string S is a subsequence of T with no gaps greater than k characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
subsequence_k: | |
Tests that string S is a subsequence of T with no gaps greater than k characters. | |
The implementation that uses the idea of a surveyor's chain: | |
Imagine a series of poles linked by chains of length k. | |
Place the first pole at the beginning of the string. | |
Now cary the next pole forward until you find a character match. | |
Plant that pole. If there is slack, move on to the next character; | |
else the previous pole has been dragged forward, and you need to go back | |
and move it to the next nearest match. | |
Repeat until you reach the end or run out of slack. | |
This runs on average in something like O(N+km) where m is length(S). | |
In testing, it averages less than 2N. | |
( but there is a degenerate case with arguments `N 2k+1 k` ) | |
*/ | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
unsigned long COMP = 0; | |
typedef struct chain_t{ | |
int slack; | |
int pole; | |
} chainlink; | |
#define ANIMATE | |
#ifdef ANIMATE | |
void show(char*s, char*t,chainlink* chain, int t_len, int ts, char stage){ | |
int i,j; | |
t_len+=1; | |
j=-1; | |
putchar('\r'); | |
for (i=1;i<=t_len;i++,t++){ | |
for (;j<chain[i].pole;j++){ | |
putchar(' '); | |
} | |
putchar(j==chain[i].pole?(s[j]==*t?*t:'|'):'_'); | |
j++; | |
} | |
fflush(stdout); | |
usleep(20000); | |
} | |
#define dprintf printf | |
#else | |
#define show(a,b,c,d,e,f) | |
#define dprintf | |
#endif | |
int subsequence_k_impl(char* t, char* s, int k, chainlink* link, int len) | |
{ | |
char* match=s; | |
int extra = k; //total slack in the chain | |
chainlink* ch = link-1; | |
show(t,s,ch,match-s,extra,' '); | |
//for all chars to match, including final null | |
while (match<=s+len){ | |
//advance until we find spot for this post or run out of chain | |
while (++COMP && t[link->pole] && t[link->pole]!=*match ){ | |
// while ( t[link->pole] && t[link->pole]!=*match ){ | |
link->pole++; link->slack--; | |
if (--extra<0) return 0; //no more slack, can't do it. | |
show(t,s,ch,match-s,extra,'A'); | |
} | |
//if we ran out of ground, it's no good | |
if (t[link->pole] != *match){ | |
return 0; | |
} | |
//if this link has slack, go to next pole | |
if (link->slack>=0) { | |
link++; match++; | |
show(t,s,ch,match-s,extra,'S'); | |
//if next pole was already placed, | |
while (link[-1].pole < link->pole) { | |
//recalc slack and advance again | |
extra += link->slack = k-(link->pole-link[-1].pole-1); | |
link++; match++; | |
show(t,s,ch,match-s,extra,'P'); | |
} | |
//if not done | |
if (match<=s+len){ | |
//currrent pole is out of order (or unplaced), move it next to prev one | |
link->pole = link[-1].pole+1; | |
extra+= link->slack = k; | |
show(t,s,ch,match-s,extra,'J'); | |
} | |
} | |
//else drag the previous pole forward to the limit of the chain. | |
else if (match>=s) { | |
int drag = (link->pole - link[-1].pole -1)- k; | |
link--;match--; | |
link->pole+=drag; | |
link->slack-=drag; | |
show(t,s,ch,match-s,extra,'D'); | |
} | |
} | |
//all poles planted. good match | |
return 1; | |
} | |
int subsequence_k(char* t, char* s, int k) | |
{ | |
int l = strlen(s); | |
dprintf("\n %s\n",t); | |
if (strlen(t)>(l+1)*(k+1)) | |
return -1; //easy exit | |
else { | |
chainlink* chain = calloc(sizeof(chainlink),l+2); | |
chain[0].pole=-1; //first pole is anchored before the string | |
chain[0].slack=0; | |
chain[1].pole=0; //start searching at first char | |
chain[1].slack=k; | |
l = subsequence_k_impl(t,s,k,chain+1,l); | |
l=l?chain[1].pole:-1; //pos of first match or -1; | |
free(chain); | |
} | |
return l; | |
} | |
char* makestring(int alphasize,int len, int clustering){ | |
char* buf=malloc(sizeof(char)*(len+1)); | |
buf[len]=0; | |
while (len ){ | |
int r=rand()%clustering+1; | |
char c=rand()%(alphasize&0x7f)+65; | |
while (len && r--) buf[--len]=c; | |
} | |
return buf; | |
} | |
char* makesubstring(char* source,int n,int maxgap){ | |
int i;//,max = (3*n/(++maxgap)); | |
char* buf=malloc(sizeof(char)*(n)); | |
int j; | |
if (!maxgap) maxgap++; | |
j=rand()%(maxgap)+1; | |
for (i=0;j<n&&i<n;i++){ | |
buf[i]=source[j]; | |
j+=rand()%(maxgap)+1; | |
} | |
buf[i]='\0'; | |
return buf; | |
} | |
#define N 100 | |
#define K 8 | |
#define A 5 | |
#define R 1 | |
#define TRIALS 20 | |
/* Test harness. | |
Args are: | |
[N] [k] [AlphabetSize] [AveRepeat] | |
defaults 100 8 26 1 | |
or: | |
-k LongString teststring | |
*/ | |
int main(int argc, char* argv[]) | |
{ | |
char* T; | |
int i,result; | |
int k=K; | |
int n=N; | |
int a=A; | |
int r=R; | |
printf("N\tm\tk\tA:\tmatch?\tcomps\t(%%)\n"); | |
if (argc>1){ | |
if (*argv[1]=='-'){ | |
char*S = argv[3]; | |
k= atoi(argv[1]+1); | |
T=argv[2]; | |
n=strlen(T); | |
result = subsequence_k(T,S,k); | |
printf("\n%d\t%ld\t%d\t-:\t%c\t%ld\t(%.3f)\n", | |
n,strlen(S),k, | |
result>=0?'Y':'N',COMP,(float)COMP/n); | |
return result; | |
} | |
else { | |
n=atoi(argv[1]); | |
if (argc>2) k=atoi(argv[2]); | |
if (argc>3) a=atoi(argv[3]); | |
if (argc>4) r=atoi(argv[4]); | |
} | |
} | |
srand(time(NULL)); | |
T = makestring(a,n,r); | |
for (i=0;i<TRIALS/2;i++){ | |
//make matching string | |
char* S = makesubstring(T,n,k); | |
COMP=0; | |
result = subsequence_k(T,S,k); | |
printf("\n%d\t%ld\t%d\t%d:\t%c\t%ld\t(%.3f)", | |
n,strlen(S),k,a, result>=0?'Y':'N',COMP,(float)COMP/n); | |
free(S); | |
} | |
for (;i<TRIALS;i++){ | |
char* S = makesubstring(T,N,K*3.0/(rand()%6+1)); //make ones that might not work | |
COMP=0; | |
int result = subsequence_k(T,S,K); | |
printf("\n%d\t%ld\t%d\t%d:\t%c\t%ld\t(%f)", | |
n,strlen(S),k, a,(result>=0?'Y':'N'),COMP,(float)COMP/n); | |
free(S); | |
} | |
free(T); | |
printf("\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment