Skip to content

Instantly share code, notes, and snippets.

@parthsarthiprasad
Created March 24, 2020 22:19
Show Gist options
  • Save parthsarthiprasad/48e8e22de534df24bd46fd8bea9c4971 to your computer and use it in GitHub Desktop.
Save parthsarthiprasad/48e8e22de534df24bd46fd8bea9c4971 to your computer and use it in GitHub Desktop.
inline proc findAll(needle: string, region: range(?) = 1:byteIndex..): []
{
/*
find all occurrences of needle in string.
return type :- integer array containing starting indices of matched pattern. (1-based indexing)
empty array if no index found
Algorithm :- boyle bad character shift array with boyle moore horspool algorithm.
preprocessing phase is O(m+k) time and
O(k) space complexity
where m = length of pattern ,
n = length of string
k = numchar = 256
worst-case performance O(mn),
average time is O(n)
best case = sub-linear
*/
use List;
var numchar = 256 : int;
var res:list(byteIndex);
var rlow:byteIndex;
var rhigh:byteIndex;
//handle region
if(!(region.hasLowBound())) then rlow = 1; else rlow=region.low;
if(!(region.hasHighBound())) then rhigh = this.len; else rhigh=region.high;
var nlen = needle.len;
//handle corner cases
if((nlen>(rhigh-rlow+1)) || (nlen==0) || (this.len==0)){
var x = res.toArray();
var finres:[0..res.size-1] int;
finres = x.reindex(0..#x.size):int;
return finres;
}
//preprocess the needle
//preprocess BoyleMoore bad character shift.
//preBmBs(needle ,rlow, rhigh, bmbc);
// boyle moore bad character array to store the preprocess of the input needle
var bmbc:[1..numchar] int;
for i in 1..numchar
{
bmbc[i]=nlen;
}
for i in 1..(nlen-1)
{
bmbc[needle[i]]=nlen-i+1;
}
//find all occurrences using BMH search
//var i = rlow;
var j = rlow;
while(j<=rhigh-nlen){
var c = this[j+nlen-1] : byteIndex;
if(needle[nlen]==c && !(needle == this[j..j+nlen])){
res.append(j);
}
j += bmbc[c];
}
var x = res.toArray();
var finres: [0..res.size-1] int;
finres = x.reindex(0..#x.size):int;
return finres;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment