Last active
June 7, 2016 08:15
-
-
Save tomachalek/2f1622b641d8d318146d8033029df1ef to your computer and use it in GitHub Desktop.
A dynamic attribute function(s) for the Manatee corpus engine
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
/* | |
How to install: | |
1) compile the module: | |
g++ -Wall -fPIC -DPIC -shared -o ucnkdynfn.so ucnkdynfn.cc | |
2) update your registry file: | |
ATTRIBUTE my_attr { | |
LABEL "My Attribute" | |
MULTIVALUE yes | |
MULTISEP "|" | |
DYNAMIC multipos | |
DYNLIB "/a/path/to/ucnkdynfn.so" | |
FUNTYPE i | |
ARG1 "0" | |
FROMATTR ep_tag | |
TYPE index | |
TRANSQUERY no | |
} | |
3) install new attribute for Manatee: | |
mkdynattr /a/path/to/the/registry/file my_attr | |
*/ | |
extern "C" { | |
const char * geteachncharbysep(char * s, int pos) { | |
std::string ss(s); | |
int state = 0; | |
std::string out; | |
std::string tmp; | |
for (std::string::size_type i = 0; i < ss.size(); ++i) { | |
if (state == 0) { | |
if (ss[i] != '|') { | |
if (i > 0) { | |
out.append(1, '|'); | |
} | |
tmp.append(1, ss[i]); | |
state = 1; | |
} | |
} else if (state == 1) { | |
if (ss[i] == '|') { | |
out.append(1, tmp[pos]); | |
tmp.clear(); | |
state = 0; | |
} else { | |
tmp.append(1, ss[i]); | |
} | |
} | |
} | |
if (tmp.length() > 0) { | |
out.append(1, tmp[pos]); | |
} | |
return out.c_str(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment