Skip to content

Instantly share code, notes, and snippets.

@jimregan
Last active December 13, 2015 22:08
Show Gist options
  • Save jimregan/4982103 to your computer and use it in GitHub Desktop.
Save jimregan/4982103 to your computer and use it in GitHub Desktop.
Index: lttoolbox/lt-comp.1
===================================================================
--- lttoolbox/lt-comp.1 (revision 42693)
+++ lttoolbox/lt-comp.1 (working copy)
@@ -10,10 +10,26 @@
.SH SYNOPSIS
.B lt-comp
[
+.B \-a \fR|
+.B \-v \fR|
+.B \-h
+]
+[
.B lr \fR|
.B rl
] dictionary_file output_file
.PP
+.B lt-comp
+[
+.B \-\-alt \fR|
+.B \-\-var \fR|
+.B \-\-help
+]
+[
+.B lr \fR|
+.B rl
+] dictionary_file output_file
+.PP
.SH DESCRIPTION
.BR lt-comp
Is the application responsible of compiling dictionaries used by
@@ -23,6 +39,15 @@
.PP
.SH OPTIONS
.TP
+.B \-a, \-\-alt
+Sets the value of the \fIalt\fR attribute to use in compilation
+.TP
+.B \-v, \-\-var
+Sets the value of the \fIv\fR attribute to use in compilation
+.TP
+.B \-h, \-\-help
+Prints a short help message
+.TP
.B lr
The resulting transducer will process dictionary entries
\fIleft-to-right\fR.
Index: lttoolbox/lt-expand.1
===================================================================
--- lttoolbox/lt-expand.1 (revision 42693)
+++ lttoolbox/lt-expand.1 (working copy)
@@ -9,17 +9,41 @@
architecture: \fBhttp://www.apertium.org\fR.
.SH SYNOPSIS
.B lt-expand
+[
+.B \-a \fR|
+.B \-v \fR|
+.B \-h
+]
dictionary_file [output_file]
.PP
+.B lt-expand
+[
+.B \-\-alt \fR|
+.B \-\-var \fR|
+.B \-\-help
+]
+dictionary_file [output_file]
+.PP
.SH DESCRIPTION
.BR lt-expand
-Is the application responsible of expanding a dictionary into a
+Is the application responsible for expanding a dictionary into a
simple list of input string-output string pairs by eliminating
paradigms through substitution and unfolding.
.PP
The output goes to \fIoutput_file\fR if it is present or to standard
output if it is missing.
.PP
+.SH OPTIONS
+.TP
+.B \-a, \-\-alt
+Sets the value of the \fIalt\fR attribute to use in expansion
+.TP
+.B \-v, \-\-var
+Sets the value of the \fIv\fR attribute to use in expansion
+.TP
+.B \-h, \-\-help
+Prints a short help message
+.PP
.SH FILES
.B dictionary_file
The input dictionary to expand.
Index: lttoolbox/compiler.cc
===================================================================
--- lttoolbox/compiler.cc (revision 42693)
+++ lttoolbox/compiler.cc (working copy)
@@ -56,6 +56,8 @@
wstring const Compiler::COMPILER_LEMMA_ATTR = L"lm";
wstring const Compiler::COMPILER_IGNORE_ATTR = L"i";
wstring const Compiler::COMPILER_IGNORE_YES_VAL = L"yes";
+wstring const Compiler::COMPILER_ALT_ATTR = L"alt";
+wstring const Compiler::COMPILER_V_ATTR = L"v";
Compiler::Compiler()
{
@@ -639,9 +641,14 @@
{
wstring atributo=this->attrib(COMPILER_RESTRICTION_ATTR);
wstring ignore = this->attrib(COMPILER_IGNORE_ATTR);
+ wstring altval = this->attrib(COMPILER_ALT_ATTR);
+ wstring varval = this->attrib(COMPILER_V_ATTR);
// if entry is masked by a restriction of direction or an ignore mark
- if((atributo != L"" && atributo != direction) || ignore == COMPILER_IGNORE_YES_VAL)
+ if((atributo != L"" && atributo != direction)
+ || ignore == COMPILER_IGNORE_YES_VAL
+ || (altval != L"" && altval != alt)
+ || (direction == COMPILER_RESTRICTION_RL_VAL && varval != "" && varval != variant))
{
// parse to the end of the entry
wstring name = L"";
@@ -852,3 +859,15 @@
it->second.write(output);
}
}
+
+void
+Compiler::setAltValue(string const &a)
+{
+ alt = XMLParseUtil::stows(a);
+}
+
+void
+Compiler::setVariantValue(string const &v)
+{
+ variant = XMLParseUtil::stows(v);
+}
Index: lttoolbox/dix.rng
===================================================================
--- lttoolbox/dix.rng (revision 42693)
+++ lttoolbox/dix.rng (working copy)
@@ -159,6 +159,12 @@
<optional>
<attribute name="srl"/>
</optional>
+ <optional>
+ <attribute name="alt"/>
+ </optional>
+ <optional>
+ <attribute name="v"/>
+ </optional>
</define>
<!--
r: restriction LR: left-to-right,
@@ -170,6 +176,8 @@
<!-- i: ignore ('yes') means ignore, otherwise it is not ignored) -->
<!-- slr: translation sense when translating from left to right -->
<!-- srl: translation sense when translating from right to left -->
+ <!-- alt: alternative entries are omitted if not selected -->
+ <!-- v: variant sets direction restrictions based on language variant -->
<define name="par">
<element name="par">
<ref name="attlist.par"/>
Index: lttoolbox/expander.cc
===================================================================
--- lttoolbox/expander.cc (revision 42693)
+++ lttoolbox/expander.cc (working copy)
@@ -295,9 +295,13 @@
{
wstring atributo=this->attrib(Compiler::COMPILER_RESTRICTION_ATTR);
wstring entrname=this->attrib(Compiler::COMPILER_LEMMA_ATTR);
+ wstring altval = this->attrib(Compiler::COMPILER_ALT_ATTR);
+ wstring varval = this->attrib(Compiler::COMPILER_V_ATTR);
wstring myname = L"";
- if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes")
+ if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes"
+ || altval != L"" && altval != alt
+ || (varval != L"" && varval != variant && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL))
{
do
{
@@ -316,7 +320,7 @@
}
EntList items, items_lr, items_rl;
- if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL)
+ if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL || (varval != L"" && varval != variant && atributo != Compiler::COMPILER_RESTRICTION_RL_VAL))
{
items_lr.push_back(pair<wstring, wstring>(L"", L""));
}
@@ -594,3 +598,15 @@
it->second.append(endings.second);
}
}
+
+void
+Expander::setAltValue(string const &a)
+{
+ alt = XMLParseUtil::stows(a);
+}
+
+void
+Expander::setVariantValue(string const &v)
+{
+ variant = XMLParseUtil::stows(v);
+}
Index: lttoolbox/expander.h
===================================================================
--- lttoolbox/expander.h (revision 42693)
+++ lttoolbox/expander.h (working copy)
@@ -42,6 +42,16 @@
xmlTextReaderPtr reader;
/**
+ * The alt value
+ */
+ wstring alt;
+
+ /**
+ * The variant value
+ */
+ wstring variant;
+
+ /**
* The paradigm being compiled
*/
wstring current_paradigm;
@@ -186,6 +196,17 @@
* Compile dictionary to letter transducers
*/
void expand(string const &fichero, FILE *output);
+ /**
+ * Set the alt value to use in compilation
+ * @param a the value
+ */
+ void setAltValue(string const &a);
+
+ /**
+ * Set the variant value to use in compilation
+ * @param v the value
+ */
+ void setVariantValue(string const &v);
};
Index: lttoolbox/dix.dtd
===================================================================
--- lttoolbox/dix.dtd (revision 42693)
+++ lttoolbox/dix.dtd (working copy)
@@ -74,6 +74,8 @@
i CDATA #IMPLIED
slr CDATA #IMPLIED
srl CDATA #IMPLIED
+ alt CDATA #IMPLIED
+ v CDATA #IMPLIED
>
<!-- r: restriction LR: left-to-right,
RL: right-to-left -->
@@ -83,6 +85,8 @@
<!-- i: ignore ('yes') means ignore, otherwise it is not ignored) -->
<!-- slr: translation sense when translating from left to right -->
<!-- srl: translation sense when translating from right to left -->
+ <!-- alt: alternative entries are omitted if not selected -->
+ <!-- v: variant sets direction restrictions based on language variant -->
<!ELEMENT par EMPTY>
<!-- reference to paradigm -->
<!ATTLIST par
Index: lttoolbox/lt_expand.cc
===================================================================
--- lttoolbox/lt_expand.cc (revision 42693)
+++ lttoolbox/lt_expand.cc (working copy)
@@ -24,6 +24,7 @@
#include <iostream>
#include <libgen.h>
#include <string>
+#include <getopt.h>
#ifdef _MSC_VER
#include <io.h>
@@ -45,14 +46,57 @@
int main(int argc, char *argv[])
{
FILE *input = NULL, *output = NULL;
+ Expander e;
- switch(argc)
+#if HAVE_GETOPT_LONG
+ int option_index=0;
+#endif
+
+ while (true) {
+#if HAVE_GETOPT_LONG
+ static struct option long_options[] =
+ {
+ {"alt", required_argument, 0, 'a'},
+ {"var", required_argument, 0, 'v'},
+ {"help", no_argument, 0, 'h'},
+ {0, 0, 0, 0}
+ };
+
+ int cnt=getopt_long(argc, argv, "a:v:h", long_options, &option_index);
+#else
+ int cnt=getopt(argc, argv, "a:v:h");
+#endif
+ if (cnt==-1)
+ break;
+
+ switch (cnt)
+ {
+ case 'a':
+ e.setAltValue(optarg);
+ break;
+
+ case 'v':
+ e.setVariantValue(optarg);
+ break;
+
+ case 'h':
+ default:
+ endProgram(argv[0]);
+ break;
+ }
+ }
+
+ string infile;
+ string outfile;
+
+ switch(argc - optind + 1)
{
case 2:
- input = fopen(argv[1], "rb");
+ infile = argv[argc-1];
+ input = fopen(infile.c_str(), "rb");
if(input == NULL)
{
- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
+ cerr << "Error: Cannot open file '" << infile << "'." << endl;
exit(EXIT_FAILURE);
}
fclose(input);
@@ -60,18 +104,20 @@
break;
case 3:
- input = fopen(argv[1], "rb");
+ infile = argv[argc-2];
+ input = fopen(infile.c_str(), "rb");
if(input == NULL)
{
- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
+ cerr << "Error: Cannot open file '" << infile << "'." << endl;
exit(EXIT_FAILURE);
}
fclose(input);
- output = fopen(argv[2], "wb");
+ outfile = argv[argc-1];
+ output = fopen(argv[argc-1], "wb");
if(output == NULL)
{
- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
+ cerr << "Error: Cannot open file '" << outfile << "'." << endl;
exit(EXIT_FAILURE);
}
break;
@@ -85,8 +131,7 @@
_setmode(_fileno(output), _O_U8TEXT);
#endif
- Expander e;
- e.expand(argv[1], output);
+ e.expand(infile, output);
fclose(output);
return EXIT_SUCCESS;
Index: lttoolbox/compiler.h
===================================================================
--- lttoolbox/compiler.h (revision 42693)
+++ lttoolbox/compiler.h (working copy)
@@ -44,6 +44,16 @@
xmlTextReaderPtr reader;
/**
+ * The alt value
+ */
+ wstring alt;
+
+ /**
+ * The variant value
+ */
+ wstring variant;
+
+ /**
* The paradigm being compiled
*/
wstring current_paradigm;
@@ -264,6 +274,8 @@
static wstring const COMPILER_LEMMA_ATTR;
static wstring const COMPILER_IGNORE_ATTR;
static wstring const COMPILER_IGNORE_YES_VAL;
+ static wstring const COMPILER_ALT_ATTR;
+ static wstring const COMPILER_V_ATTR;
/**
@@ -292,6 +304,18 @@
* @param fd the stream where write the result
*/
void write(FILE *fd);
+
+ /**
+ * Set the alt value to use in compilation
+ * @param a the value
+ */
+ void setAltValue(string const &a);
+
+ /**
+ * Set the variant value to use in compilation
+ * @param v the value
+ */
+ void setVariantValue(string const &v);
};
Index: lttoolbox/lt_comp.cc
===================================================================
--- lttoolbox/lt_comp.cc (revision 42693)
+++ lttoolbox/lt_comp.cc (working copy)
@@ -23,6 +23,7 @@
#include <iostream>
#include <libgen.h>
#include <string>
+#include <getopt.h>
using namespace std;
@@ -31,7 +32,9 @@
if(name != NULL)
{
cout << basename(name) << " v" << PACKAGE_VERSION <<": build a letter transducer from a dictionary" << endl;
- cout << "USAGE: " << basename(name) << " lr | rl dictionary_file output_file [acx_file]" << endl;
+ cout << "USAGE: " << basename(name) << " [-avh] lr | rl dictionary_file output_file [acx_file]" << endl;
+ cout << " -v: set language variant" << endl;
+ cout << " -a: set alternative" << endl;
cout << "Modes:" << endl;
cout << " lr: left-to-right compilation" << endl;
cout << " rl: right-to-left compilation" << endl;
@@ -42,37 +45,92 @@
int main(int argc, char *argv[])
{
- if(argc != 4 && argc != 5)
+ Compiler c;
+
+#if HAVE_GETOPT_LONG
+ int option_index=0;
+#endif
+
+ while (true) {
+#if HAVE_GETOPT_LONG
+ static struct option long_options[] =
+ {
+ {"alt", required_argument, 0, 'a'},
+ {"var", required_argument, 0, 'v'},
+ {"help", no_argument, 0, 'h'},
+ {0, 0, 0, 0}
+ };
+
+ int cnt=getopt_long(argc, argv, "a:v:h", long_options, &option_index);
+#else
+ int cnt=getopt(argc, argv, "a:v:h");
+#endif
+ if (cnt==-1)
+ break;
+
+ switch (cnt)
+ {
+ case 'a':
+ c.setAltValue(optarg);
+ break;
+
+ case 'v':
+ c.setVariantValue(optarg);
+ break;
+
+ case 'h':
+ default:
+ endProgram(argv[0]);
+ break;
+ }
+ }
+
+ string opc;
+ string infile;
+ string outfile;
+ string acxfile;
+
+ switch(argc - optind + 1)
{
- endProgram(argv[0]);
+ case 5:
+ opc = argv[argc-4];
+ infile = argv[argc-3];
+ outfile = argv[argc-2];
+ acxfile = argv[argc-1];
+ break;
+
+ case 4:
+ opc = argv[argc-3];
+ infile = argv[argc-2];
+ outfile = argv[argc-1];
+ break;
+
+ default:
+ endProgram(argv[0]);
+ break;
}
- string opc = argv[1];
-
- Compiler c;
-
-
if(opc == "lr")
{
- if(argc == 5)
+ if(acxfile != "")
{
- c.parseACX(argv[4], Compiler::COMPILER_RESTRICTION_LR_VAL);
+ c.parseACX(acxfile, Compiler::COMPILER_RESTRICTION_LR_VAL);
}
- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_LR_VAL);
+ c.parse(infile, Compiler::COMPILER_RESTRICTION_LR_VAL);
}
else if(opc == "rl")
{
- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_RL_VAL);
+ c.parse(infile, Compiler::COMPILER_RESTRICTION_RL_VAL);
}
else
{
endProgram(argv[0]);
}
- FILE *output = fopen(argv[3], "wb");
+ FILE *output = fopen(outfile.c_str(), "wb");
if(!output)
{
- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
+ cerr << "Error: Cannot open file '" << outfile << "'." << endl;
exit(EXIT_FAILURE);
}
c.write(output);
<dictionary>
<alphabet>abcdefghijklmnopqrstuvwxyz</alphabet>
<sdefs>
<sdef n="n"/>
</sdefs>
<section id="main" type="standard">
<e><p><l>foo</l><r>foo<s n="n"/></r></p></e>
<e alt="skip"><p><l>bar</l><r>bar<s n="n"/></r></p></e>
<e alt="keep"><p><l>baz</l><r>baz<s n="n"/></r></p></e>
<e v="GB"><p><l>colour</l><r>colour<s n="n"/></r></p></e>
<e v="US"><p><l>color</l><r>colour<s n="n"/></r></p></e>
<e v="US" r="LR"><p><l>color</l><r>colour<s n="n"/></r></p></e>
<e v="US" r="RL"><p><l>color</l><r>colour<s n="n"/></r></p></e>
</section>
</dictionary>
$ ./lttoolbox/lt-expand -a keep -v US sample.xml
foo:foo<n>
baz:baz<n>
colour:>:colour<n>
color:colour<n>
color:>:colour<n>
color:<:colour<n>
$ ./lttoolbox/lt-expand -a keep -v GB sample.xml
foo:foo<n>
baz:baz<n>
colour:colour<n>
color:>:colour<n>
color:>:colour<n>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment