Last active
December 13, 2015 22:08
-
-
Save jimregan/4982103 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Index: lttoolbox/lt-comp.1 | |
=================================================================== | |
--- lttoolbox/lt-comp.1 (revision 42693) | |
+++ lttoolbox/lt-comp.1 (working copy) | |
@@ -10,10 +10,26 @@ | |
.SH SYNOPSIS | |
.B lt-comp | |
[ | |
+.B \-a \fR| | |
+.B \-v \fR| | |
+.B \-h | |
+] | |
+[ | |
.B lr \fR| | |
.B rl | |
] dictionary_file output_file | |
.PP | |
+.B lt-comp | |
+[ | |
+.B \-\-alt \fR| | |
+.B \-\-var \fR| | |
+.B \-\-help | |
+] | |
+[ | |
+.B lr \fR| | |
+.B rl | |
+] dictionary_file output_file | |
+.PP | |
.SH DESCRIPTION | |
.BR lt-comp | |
Is the application responsible of compiling dictionaries used by | |
@@ -23,6 +39,15 @@ | |
.PP | |
.SH OPTIONS | |
.TP | |
+.B \-a, \-\-alt | |
+Sets the value of the \fIalt\fR attribute to use in compilation | |
+.TP | |
+.B \-v, \-\-var | |
+Sets the value of the \fIv\fR attribute to use in compilation | |
+.TP | |
+.B \-h, \-\-help | |
+Prints a short help message | |
+.TP | |
.B lr | |
The resulting transducer will process dictionary entries | |
\fIleft-to-right\fR. | |
Index: lttoolbox/lt-expand.1 | |
=================================================================== | |
--- lttoolbox/lt-expand.1 (revision 42693) | |
+++ lttoolbox/lt-expand.1 (working copy) | |
@@ -9,17 +9,41 @@ | |
architecture: \fBhttp://www.apertium.org\fR. | |
.SH SYNOPSIS | |
.B lt-expand | |
+[ | |
+.B \-a \fR| | |
+.B \-v \fR| | |
+.B \-h | |
+] | |
dictionary_file [output_file] | |
.PP | |
+.B lt-expand | |
+[ | |
+.B \-\-alt \fR| | |
+.B \-\-var \fR| | |
+.B \-\-help | |
+] | |
+dictionary_file [output_file] | |
+.PP | |
.SH DESCRIPTION | |
.BR lt-expand | |
-Is the application responsible of expanding a dictionary into a | |
+Is the application responsible for expanding a dictionary into a | |
simple list of input string-output string pairs by eliminating | |
paradigms through substitution and unfolding. | |
.PP | |
The output goes to \fIoutput_file\fR if it is present or to standard | |
output if it is missing. | |
.PP | |
+.SH OPTIONS | |
+.TP | |
+.B \-a, \-\-alt | |
+Sets the value of the \fIalt\fR attribute to use in expansion | |
+.TP | |
+.B \-v, \-\-var | |
+Sets the value of the \fIv\fR attribute to use in expansion | |
+.TP | |
+.B \-h, \-\-help | |
+Prints a short help message | |
+.PP | |
.SH FILES | |
.B dictionary_file | |
The input dictionary to expand. | |
Index: lttoolbox/compiler.cc | |
=================================================================== | |
--- lttoolbox/compiler.cc (revision 42693) | |
+++ lttoolbox/compiler.cc (working copy) | |
@@ -56,6 +56,8 @@ | |
wstring const Compiler::COMPILER_LEMMA_ATTR = L"lm"; | |
wstring const Compiler::COMPILER_IGNORE_ATTR = L"i"; | |
wstring const Compiler::COMPILER_IGNORE_YES_VAL = L"yes"; | |
+wstring const Compiler::COMPILER_ALT_ATTR = L"alt"; | |
+wstring const Compiler::COMPILER_V_ATTR = L"v"; | |
Compiler::Compiler() | |
{ | |
@@ -639,9 +641,14 @@ | |
{ | |
wstring atributo=this->attrib(COMPILER_RESTRICTION_ATTR); | |
wstring ignore = this->attrib(COMPILER_IGNORE_ATTR); | |
+ wstring altval = this->attrib(COMPILER_ALT_ATTR); | |
+ wstring varval = this->attrib(COMPILER_V_ATTR); | |
// if entry is masked by a restriction of direction or an ignore mark | |
- if((atributo != L"" && atributo != direction) || ignore == COMPILER_IGNORE_YES_VAL) | |
+ if((atributo != L"" && atributo != direction) | |
+ || ignore == COMPILER_IGNORE_YES_VAL | |
+ || (altval != L"" && altval != alt) | |
+ || (direction == COMPILER_RESTRICTION_RL_VAL && varval != "" && varval != variant)) | |
{ | |
// parse to the end of the entry | |
wstring name = L""; | |
@@ -852,3 +859,15 @@ | |
it->second.write(output); | |
} | |
} | |
+ | |
+void | |
+Compiler::setAltValue(string const &a) | |
+{ | |
+ alt = XMLParseUtil::stows(a); | |
+} | |
+ | |
+void | |
+Compiler::setVariantValue(string const &v) | |
+{ | |
+ variant = XMLParseUtil::stows(v); | |
+} | |
Index: lttoolbox/dix.rng | |
=================================================================== | |
--- lttoolbox/dix.rng (revision 42693) | |
+++ lttoolbox/dix.rng (working copy) | |
@@ -159,6 +159,12 @@ | |
<optional> | |
<attribute name="srl"/> | |
</optional> | |
+ <optional> | |
+ <attribute name="alt"/> | |
+ </optional> | |
+ <optional> | |
+ <attribute name="v"/> | |
+ </optional> | |
</define> | |
<!-- | |
r: restriction LR: left-to-right, | |
@@ -170,6 +176,8 @@ | |
<!-- i: ignore ('yes') means ignore, otherwise it is not ignored) --> | |
<!-- slr: translation sense when translating from left to right --> | |
<!-- srl: translation sense when translating from right to left --> | |
+ <!-- alt: alternative entries are omitted if not selected --> | |
+ <!-- v: variant sets direction restrictions based on language variant --> | |
<define name="par"> | |
<element name="par"> | |
<ref name="attlist.par"/> | |
Index: lttoolbox/expander.cc | |
=================================================================== | |
--- lttoolbox/expander.cc (revision 42693) | |
+++ lttoolbox/expander.cc (working copy) | |
@@ -295,9 +295,13 @@ | |
{ | |
wstring atributo=this->attrib(Compiler::COMPILER_RESTRICTION_ATTR); | |
wstring entrname=this->attrib(Compiler::COMPILER_LEMMA_ATTR); | |
+ wstring altval = this->attrib(Compiler::COMPILER_ALT_ATTR); | |
+ wstring varval = this->attrib(Compiler::COMPILER_V_ATTR); | |
wstring myname = L""; | |
- if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes") | |
+ if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes" | |
+ || altval != L"" && altval != alt | |
+ || (varval != L"" && varval != variant && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)) | |
{ | |
do | |
{ | |
@@ -316,7 +320,7 @@ | |
} | |
EntList items, items_lr, items_rl; | |
- if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL) | |
+ if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL || (varval != L"" && varval != variant && atributo != Compiler::COMPILER_RESTRICTION_RL_VAL)) | |
{ | |
items_lr.push_back(pair<wstring, wstring>(L"", L"")); | |
} | |
@@ -594,3 +598,15 @@ | |
it->second.append(endings.second); | |
} | |
} | |
+ | |
+void | |
+Expander::setAltValue(string const &a) | |
+{ | |
+ alt = XMLParseUtil::stows(a); | |
+} | |
+ | |
+void | |
+Expander::setVariantValue(string const &v) | |
+{ | |
+ variant = XMLParseUtil::stows(v); | |
+} | |
Index: lttoolbox/expander.h | |
=================================================================== | |
--- lttoolbox/expander.h (revision 42693) | |
+++ lttoolbox/expander.h (working copy) | |
@@ -42,6 +42,16 @@ | |
xmlTextReaderPtr reader; | |
/** | |
+ * The alt value | |
+ */ | |
+ wstring alt; | |
+ | |
+ /** | |
+ * The variant value | |
+ */ | |
+ wstring variant; | |
+ | |
+ /** | |
* The paradigm being compiled | |
*/ | |
wstring current_paradigm; | |
@@ -186,6 +196,17 @@ | |
* Compile dictionary to letter transducers | |
*/ | |
void expand(string const &fichero, FILE *output); | |
+ /** | |
+ * Set the alt value to use in compilation | |
+ * @param a the value | |
+ */ | |
+ void setAltValue(string const &a); | |
+ | |
+ /** | |
+ * Set the variant value to use in compilation | |
+ * @param v the value | |
+ */ | |
+ void setVariantValue(string const &v); | |
}; | |
Index: lttoolbox/dix.dtd | |
=================================================================== | |
--- lttoolbox/dix.dtd (revision 42693) | |
+++ lttoolbox/dix.dtd (working copy) | |
@@ -74,6 +74,8 @@ | |
i CDATA #IMPLIED | |
slr CDATA #IMPLIED | |
srl CDATA #IMPLIED | |
+ alt CDATA #IMPLIED | |
+ v CDATA #IMPLIED | |
> | |
<!-- r: restriction LR: left-to-right, | |
RL: right-to-left --> | |
@@ -83,6 +85,8 @@ | |
<!-- i: ignore ('yes') means ignore, otherwise it is not ignored) --> | |
<!-- slr: translation sense when translating from left to right --> | |
<!-- srl: translation sense when translating from right to left --> | |
+ <!-- alt: alternative entries are omitted if not selected --> | |
+ <!-- v: variant sets direction restrictions based on language variant --> | |
<!ELEMENT par EMPTY> | |
<!-- reference to paradigm --> | |
<!ATTLIST par | |
Index: lttoolbox/lt_expand.cc | |
=================================================================== | |
--- lttoolbox/lt_expand.cc (revision 42693) | |
+++ lttoolbox/lt_expand.cc (working copy) | |
@@ -24,6 +24,7 @@ | |
#include <iostream> | |
#include <libgen.h> | |
#include <string> | |
+#include <getopt.h> | |
#ifdef _MSC_VER | |
#include <io.h> | |
@@ -45,14 +46,57 @@ | |
int main(int argc, char *argv[]) | |
{ | |
FILE *input = NULL, *output = NULL; | |
+ Expander e; | |
- switch(argc) | |
+#if HAVE_GETOPT_LONG | |
+ int option_index=0; | |
+#endif | |
+ | |
+ while (true) { | |
+#if HAVE_GETOPT_LONG | |
+ static struct option long_options[] = | |
+ { | |
+ {"alt", required_argument, 0, 'a'}, | |
+ {"var", required_argument, 0, 'v'}, | |
+ {"help", no_argument, 0, 'h'}, | |
+ {0, 0, 0, 0} | |
+ }; | |
+ | |
+ int cnt=getopt_long(argc, argv, "a:v:h", long_options, &option_index); | |
+#else | |
+ int cnt=getopt(argc, argv, "a:v:h"); | |
+#endif | |
+ if (cnt==-1) | |
+ break; | |
+ | |
+ switch (cnt) | |
+ { | |
+ case 'a': | |
+ e.setAltValue(optarg); | |
+ break; | |
+ | |
+ case 'v': | |
+ e.setVariantValue(optarg); | |
+ break; | |
+ | |
+ case 'h': | |
+ default: | |
+ endProgram(argv[0]); | |
+ break; | |
+ } | |
+ } | |
+ | |
+ string infile; | |
+ string outfile; | |
+ | |
+ switch(argc - optind + 1) | |
{ | |
case 2: | |
- input = fopen(argv[1], "rb"); | |
+ infile = argv[argc-1]; | |
+ input = fopen(infile.c_str(), "rb"); | |
if(input == NULL) | |
{ | |
- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl; | |
+ cerr << "Error: Cannot open file '" << infile << "'." << endl; | |
exit(EXIT_FAILURE); | |
} | |
fclose(input); | |
@@ -60,18 +104,20 @@ | |
break; | |
case 3: | |
- input = fopen(argv[1], "rb"); | |
+ infile = argv[argc-2]; | |
+ input = fopen(infile.c_str(), "rb"); | |
if(input == NULL) | |
{ | |
- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl; | |
+ cerr << "Error: Cannot open file '" << infile << "'." << endl; | |
exit(EXIT_FAILURE); | |
} | |
fclose(input); | |
- output = fopen(argv[2], "wb"); | |
+ outfile = argv[argc-1]; | |
+ output = fopen(argv[argc-1], "wb"); | |
if(output == NULL) | |
{ | |
- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl; | |
+ cerr << "Error: Cannot open file '" << outfile << "'." << endl; | |
exit(EXIT_FAILURE); | |
} | |
break; | |
@@ -85,8 +131,7 @@ | |
_setmode(_fileno(output), _O_U8TEXT); | |
#endif | |
- Expander e; | |
- e.expand(argv[1], output); | |
+ e.expand(infile, output); | |
fclose(output); | |
return EXIT_SUCCESS; | |
Index: lttoolbox/compiler.h | |
=================================================================== | |
--- lttoolbox/compiler.h (revision 42693) | |
+++ lttoolbox/compiler.h (working copy) | |
@@ -44,6 +44,16 @@ | |
xmlTextReaderPtr reader; | |
/** | |
+ * The alt value | |
+ */ | |
+ wstring alt; | |
+ | |
+ /** | |
+ * The variant value | |
+ */ | |
+ wstring variant; | |
+ | |
+ /** | |
* The paradigm being compiled | |
*/ | |
wstring current_paradigm; | |
@@ -264,6 +274,8 @@ | |
static wstring const COMPILER_LEMMA_ATTR; | |
static wstring const COMPILER_IGNORE_ATTR; | |
static wstring const COMPILER_IGNORE_YES_VAL; | |
+ static wstring const COMPILER_ALT_ATTR; | |
+ static wstring const COMPILER_V_ATTR; | |
/** | |
@@ -292,6 +304,18 @@ | |
* @param fd the stream where write the result | |
*/ | |
void write(FILE *fd); | |
+ | |
+ /** | |
+ * Set the alt value to use in compilation | |
+ * @param a the value | |
+ */ | |
+ void setAltValue(string const &a); | |
+ | |
+ /** | |
+ * Set the variant value to use in compilation | |
+ * @param v the value | |
+ */ | |
+ void setVariantValue(string const &v); | |
}; | |
Index: lttoolbox/lt_comp.cc | |
=================================================================== | |
--- lttoolbox/lt_comp.cc (revision 42693) | |
+++ lttoolbox/lt_comp.cc (working copy) | |
@@ -23,6 +23,7 @@ | |
#include <iostream> | |
#include <libgen.h> | |
#include <string> | |
+#include <getopt.h> | |
using namespace std; | |
@@ -31,7 +32,9 @@ | |
if(name != NULL) | |
{ | |
cout << basename(name) << " v" << PACKAGE_VERSION <<": build a letter transducer from a dictionary" << endl; | |
- cout << "USAGE: " << basename(name) << " lr | rl dictionary_file output_file [acx_file]" << endl; | |
+ cout << "USAGE: " << basename(name) << " [-avh] lr | rl dictionary_file output_file [acx_file]" << endl; | |
+ cout << " -v: set language variant" << endl; | |
+ cout << " -a: set alternative" << endl; | |
cout << "Modes:" << endl; | |
cout << " lr: left-to-right compilation" << endl; | |
cout << " rl: right-to-left compilation" << endl; | |
@@ -42,37 +45,92 @@ | |
int main(int argc, char *argv[]) | |
{ | |
- if(argc != 4 && argc != 5) | |
+ Compiler c; | |
+ | |
+#if HAVE_GETOPT_LONG | |
+ int option_index=0; | |
+#endif | |
+ | |
+ while (true) { | |
+#if HAVE_GETOPT_LONG | |
+ static struct option long_options[] = | |
+ { | |
+ {"alt", required_argument, 0, 'a'}, | |
+ {"var", required_argument, 0, 'v'}, | |
+ {"help", no_argument, 0, 'h'}, | |
+ {0, 0, 0, 0} | |
+ }; | |
+ | |
+ int cnt=getopt_long(argc, argv, "a:v:h", long_options, &option_index); | |
+#else | |
+ int cnt=getopt(argc, argv, "a:v:h"); | |
+#endif | |
+ if (cnt==-1) | |
+ break; | |
+ | |
+ switch (cnt) | |
+ { | |
+ case 'a': | |
+ c.setAltValue(optarg); | |
+ break; | |
+ | |
+ case 'v': | |
+ c.setVariantValue(optarg); | |
+ break; | |
+ | |
+ case 'h': | |
+ default: | |
+ endProgram(argv[0]); | |
+ break; | |
+ } | |
+ } | |
+ | |
+ string opc; | |
+ string infile; | |
+ string outfile; | |
+ string acxfile; | |
+ | |
+ switch(argc - optind + 1) | |
{ | |
- endProgram(argv[0]); | |
+ case 5: | |
+ opc = argv[argc-4]; | |
+ infile = argv[argc-3]; | |
+ outfile = argv[argc-2]; | |
+ acxfile = argv[argc-1]; | |
+ break; | |
+ | |
+ case 4: | |
+ opc = argv[argc-3]; | |
+ infile = argv[argc-2]; | |
+ outfile = argv[argc-1]; | |
+ break; | |
+ | |
+ default: | |
+ endProgram(argv[0]); | |
+ break; | |
} | |
- string opc = argv[1]; | |
- | |
- Compiler c; | |
- | |
- | |
if(opc == "lr") | |
{ | |
- if(argc == 5) | |
+ if(acxfile != "") | |
{ | |
- c.parseACX(argv[4], Compiler::COMPILER_RESTRICTION_LR_VAL); | |
+ c.parseACX(acxfile, Compiler::COMPILER_RESTRICTION_LR_VAL); | |
} | |
- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_LR_VAL); | |
+ c.parse(infile, Compiler::COMPILER_RESTRICTION_LR_VAL); | |
} | |
else if(opc == "rl") | |
{ | |
- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_RL_VAL); | |
+ c.parse(infile, Compiler::COMPILER_RESTRICTION_RL_VAL); | |
} | |
else | |
{ | |
endProgram(argv[0]); | |
} | |
- FILE *output = fopen(argv[3], "wb"); | |
+ FILE *output = fopen(outfile.c_str(), "wb"); | |
if(!output) | |
{ | |
- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl; | |
+ cerr << "Error: Cannot open file '" << outfile << "'." << endl; | |
exit(EXIT_FAILURE); | |
} | |
c.write(output); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<dictionary> | |
<alphabet>abcdefghijklmnopqrstuvwxyz</alphabet> | |
<sdefs> | |
<sdef n="n"/> | |
</sdefs> | |
<section id="main" type="standard"> | |
<e><p><l>foo</l><r>foo<s n="n"/></r></p></e> | |
<e alt="skip"><p><l>bar</l><r>bar<s n="n"/></r></p></e> | |
<e alt="keep"><p><l>baz</l><r>baz<s n="n"/></r></p></e> | |
<e v="GB"><p><l>colour</l><r>colour<s n="n"/></r></p></e> | |
<e v="US"><p><l>color</l><r>colour<s n="n"/></r></p></e> | |
<e v="US" r="LR"><p><l>color</l><r>colour<s n="n"/></r></p></e> | |
<e v="US" r="RL"><p><l>color</l><r>colour<s n="n"/></r></p></e> | |
</section> | |
</dictionary> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ./lttoolbox/lt-expand -a keep -v US sample.xml | |
foo:foo<n> | |
baz:baz<n> | |
colour:>:colour<n> | |
color:colour<n> | |
color:>:colour<n> | |
color:<:colour<n> | |
$ ./lttoolbox/lt-expand -a keep -v GB sample.xml | |
foo:foo<n> | |
baz:baz<n> | |
colour:colour<n> | |
color:>:colour<n> | |
color:>:colour<n> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment