Last active
August 29, 2015 14:03
-
-
Save dimazest/4815236d92bd93465951 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
p = nltk.parse.malt.MaltParser( | |
bin='/Users/dimazest/qmul/tools/parts/malt-1.7.2/dist/maltparser-1.7.2/maltparser-1.7.2.jar', | |
mco='engmalt.linear-1.7', | |
working_dir='/Users/dimazest/qmul/tools/sw/', | |
additional_java_args=['-Xmx512m'], | |
) | |
p.parse('a man runs'.split()).tree( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/nltk/parse/api.py b/nltk/parse/api.py | |
index 9ec29c8..b809767 100644 | |
--- a/nltk/parse/api.py | |
+++ b/nltk/parse/api.py | |
@@ -7,10 +7,9 @@ | |
# For license information, see LICENSE.TXT | |
# | |
-import itertools | |
- | |
from nltk.internals import overridden | |
+ | |
class ParserI(object): | |
""" | |
A processing class for deriving trees that represent possible | |
@@ -42,7 +41,7 @@ class ParserI(object): | |
:rtype: iter(Tree) | |
""" | |
if overridden(self.parse_sents): | |
- return next(self.parse_sents([sent])) | |
+ return next(iter(self.parse_sents([sent]))) | |
elif overridden(self.parse_one): | |
return (tree for tree in [self.parse_one(sent)] if tree is not None) | |
elif overridden(self.parse_all): | |
@@ -63,4 +62,4 @@ class ParserI(object): | |
def parse_one(self, sent): | |
""":rtype: Tree or None""" | |
- return next(self.parse(sent), None) | |
+ return next(iter(self.parse(sent)), None) | |
diff --git a/nltk/parse/malt.py b/nltk/parse/malt.py | |
index 59d547b..0717c6d 100644 | |
--- a/nltk/parse/malt.py | |
+++ b/nltk/parse/malt.py | |
@@ -22,9 +22,10 @@ from nltk.internals import find_binary | |
from nltk.parse.api import ParserI | |
from nltk.parse.dependencygraph import DependencyGraph | |
+ | |
class MaltParser(ParserI): | |
- def __init__(self, tagger=None, mco=None, working_dir=None, additional_java_args=None): | |
+ def __init__(self, tagger=None, mco=None, working_dir=None, additional_java_args=None, bin=None): | |
""" | |
An interface for parsing with the Malt Parser. | |
@@ -32,11 +33,17 @@ class MaltParser(ParserI): | |
will not be required, and MaltParser will use the model file in | |
${working_dir}/${mco}.mco. | |
:type mco: str | |
+ | |
+ :param bin: The full path to the ``malt`` binary. If not | |
+ specified, then nltk will search the system for a ``malt.jar`` | |
+ binary; and if one is not found, it will raise a | |
+ ``LookupError`` exception. | |
+ :type bin: str | |
+ | |
""" | |
- self.config_malt() | |
+ self.config_malt(bin=bin) | |
self.mco = 'malt_temp' if mco is None else mco | |
- self.working_dir = tempfile.gettempdir() if working_dir is None\ | |
- else working_dir | |
+ self.working_dir = tempfile.gettempdir() if working_dir is None else working_dir | |
self.additional_java_args = [] if additional_java_args is None else additional_java_args | |
self._trained = mco is not None | |
@@ -69,23 +76,29 @@ class MaltParser(ParserI): | |
#: A list of directories that should be searched for the malt | |
#: executables. This list is used by ``config_malt`` when searching | |
#: for the malt executables. | |
- _malt_path = ['.', | |
- '/usr/lib/malt-1*', | |
- '/usr/share/malt-1*', | |
- '/usr/local/bin', | |
- '/usr/local/malt-1*', | |
- '/usr/local/bin/malt-1*', | |
- '/usr/local/malt-1*', | |
- '/usr/local/share/malt-1*'] | |
+ _malt_path = [ | |
+ '.', | |
+ '/usr/lib/malt-1*', | |
+ '/usr/share/malt-1*', | |
+ '/usr/local/bin', | |
+ '/usr/local/malt-1*', | |
+ '/usr/local/bin/malt-1*', | |
+ '/usr/local/malt-1*', | |
+ '/usr/local/share/malt-1*', | |
+ ] | |
# Expand wildcards in _malt_path: | |
malt_path = reduce(add, map(glob.glob, _malt_path)) | |
# Find the malt binary. | |
- self._malt_bin = find_binary('malt.jar', bin, | |
- searchpath=malt_path, env_vars=['MALTPARSERHOME'], | |
+ self._malt_bin = find_binary( | |
+ 'malt.jar', | |
+ bin, | |
+ searchpath=malt_path, | |
+ env_vars=['MALTPARSERHOME'], | |
url='http://www.maltparser.org/', | |
- verbose=verbose) | |
+ verbose=verbose, | |
+ ) | |
def parse_all(self, sentence, verbose=False): | |
""" | |
@@ -159,8 +172,8 @@ class MaltParser(ParserI): | |
dir=self.working_dir, | |
delete=False) | |
output_file = tempfile.NamedTemporaryFile(prefix='malt_output.conll', | |
- dir=self.working_dir, | |
- delete=False) | |
+ dir=self.working_dir, | |
+ delete=False) | |
try: | |
for sentence in sentences: | |
@@ -262,7 +275,7 @@ def demo(): | |
verbose = False | |
maltParser = MaltParser() | |
- maltParser.train([dg1,dg2], verbose=verbose) | |
+ maltParser.train([dg1, dg2], verbose=verbose) | |
print(maltParser.raw_parse('John sees Mary', verbose=verbose).tree().pprint()) | |
print(maltParser.raw_parse('a man runs', verbose=verbose).tree().pprint()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment