Skip to content

Instantly share code, notes, and snippets.

@ashelly
Last active December 29, 2015 03:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ashelly/7610713 to your computer and use it in GitHub Desktop.
Save ashelly/7610713 to your computer and use it in GitHub Desktop.
Tests that string S is a subsequence of T with no gaps greater than k characters
/*
subsequence_k:
Tests that string S is a subsequence of T with no gaps greater than k characters.
The implementation that uses the idea of a surveyor's chain:
Imagine a series of poles linked by chains of length k.
Place the first pole at the beginning of the string.
Now cary the next pole forward until you find a character match.
Plant that pole. If there is slack, move on to the next character;
else the previous pole has been dragged forward, and you need to go back
and move it to the next nearest match.
Repeat until you reach the end or run out of slack.
This runs on average in something like O(N+km) where m is length(S).
In testing, it averages less than 2N.
( but there is a degenerate case with arguments `N 2k+1 k` )
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
unsigned long COMP = 0;
typedef struct chain_t{
int slack;
int pole;
} chainlink;
#define ANIMATE
#ifdef ANIMATE
void show(char*s, char*t,chainlink* chain, int t_len, int ts, char stage){
int i,j;
t_len+=1;
j=-1;
putchar('\r');
for (i=1;i<=t_len;i++,t++){
for (;j<chain[i].pole;j++){
putchar(' ');
}
putchar(j==chain[i].pole?(s[j]==*t?*t:'|'):'_');
j++;
}
fflush(stdout);
usleep(20000);
}
#define dprintf printf
#else
#define show(a,b,c,d,e,f)
#define dprintf
#endif
int subsequence_k_impl(char* t, char* s, int k, chainlink* link, int len)
{
char* match=s;
int extra = k; //total slack in the chain
chainlink* ch = link-1;
show(t,s,ch,match-s,extra,' ');
//for all chars to match, including final null
while (match<=s+len){
//advance until we find spot for this post or run out of chain
while (++COMP && t[link->pole] && t[link->pole]!=*match ){
// while ( t[link->pole] && t[link->pole]!=*match ){
link->pole++; link->slack--;
if (--extra<0) return 0; //no more slack, can't do it.
show(t,s,ch,match-s,extra,'A');
}
//if we ran out of ground, it's no good
if (t[link->pole] != *match){
return 0;
}
//if this link has slack, go to next pole
if (link->slack>=0) {
link++; match++;
show(t,s,ch,match-s,extra,'S');
//if next pole was already placed,
while (link[-1].pole < link->pole) {
//recalc slack and advance again
extra += link->slack = k-(link->pole-link[-1].pole-1);
link++; match++;
show(t,s,ch,match-s,extra,'P');
}
//if not done
if (match<=s+len){
//currrent pole is out of order (or unplaced), move it next to prev one
link->pole = link[-1].pole+1;
extra+= link->slack = k;
show(t,s,ch,match-s,extra,'J');
}
}
//else drag the previous pole forward to the limit of the chain.
else if (match>=s) {
int drag = (link->pole - link[-1].pole -1)- k;
link--;match--;
link->pole+=drag;
link->slack-=drag;
show(t,s,ch,match-s,extra,'D');
}
}
//all poles planted. good match
return 1;
}
int subsequence_k(char* t, char* s, int k)
{
int l = strlen(s);
dprintf("\n %s\n",t);
if (strlen(t)>(l+1)*(k+1))
return -1; //easy exit
else {
chainlink* chain = calloc(sizeof(chainlink),l+2);
chain[0].pole=-1; //first pole is anchored before the string
chain[0].slack=0;
chain[1].pole=0; //start searching at first char
chain[1].slack=k;
l = subsequence_k_impl(t,s,k,chain+1,l);
l=l?chain[1].pole:-1; //pos of first match or -1;
free(chain);
}
return l;
}
char* makestring(int alphasize,int len, int clustering){
char* buf=malloc(sizeof(char)*(len+1));
buf[len]=0;
while (len ){
int r=rand()%clustering+1;
char c=rand()%(alphasize&0x7f)+65;
while (len && r--) buf[--len]=c;
}
return buf;
}
char* makesubstring(char* source,int n,int maxgap){
int i;//,max = (3*n/(++maxgap));
char* buf=malloc(sizeof(char)*(n));
int j;
if (!maxgap) maxgap++;
j=rand()%(maxgap)+1;
for (i=0;j<n&&i<n;i++){
buf[i]=source[j];
j+=rand()%(maxgap)+1;
}
buf[i]='\0';
return buf;
}
#define N 100
#define K 8
#define A 5
#define R 1
#define TRIALS 20
/* Test harness.
Args are:
[N] [k] [AlphabetSize] [AveRepeat]
defaults 100 8 26 1
or:
-k LongString teststring
*/
int main(int argc, char* argv[])
{
char* T;
int i,result;
int k=K;
int n=N;
int a=A;
int r=R;
printf("N\tm\tk\tA:\tmatch?\tcomps\t(%%)\n");
if (argc>1){
if (*argv[1]=='-'){
char*S = argv[3];
k= atoi(argv[1]+1);
T=argv[2];
n=strlen(T);
result = subsequence_k(T,S,k);
printf("\n%d\t%ld\t%d\t-:\t%c\t%ld\t(%.3f)\n",
n,strlen(S),k,
result>=0?'Y':'N',COMP,(float)COMP/n);
return result;
}
else {
n=atoi(argv[1]);
if (argc>2) k=atoi(argv[2]);
if (argc>3) a=atoi(argv[3]);
if (argc>4) r=atoi(argv[4]);
}
}
srand(time(NULL));
T = makestring(a,n,r);
for (i=0;i<TRIALS/2;i++){
//make matching string
char* S = makesubstring(T,n,k);
COMP=0;
result = subsequence_k(T,S,k);
printf("\n%d\t%ld\t%d\t%d:\t%c\t%ld\t(%.3f)",
n,strlen(S),k,a, result>=0?'Y':'N',COMP,(float)COMP/n);
free(S);
}
for (;i<TRIALS;i++){
char* S = makesubstring(T,N,K*3.0/(rand()%6+1)); //make ones that might not work
COMP=0;
int result = subsequence_k(T,S,K);
printf("\n%d\t%ld\t%d\t%d:\t%c\t%ld\t(%f)",
n,strlen(S),k, a,(result>=0?'Y':'N'),COMP,(float)COMP/n);
free(S);
}
free(T);
printf("\n");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment