Skip to content

Instantly share code, notes, and snippets.

@Sparkboxx
Created August 12, 2013 09:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Sparkboxx/6209422 to your computer and use it in GitHub Desktop.
Save Sparkboxx/6209422 to your computer and use it in GitHub Desktop.

Using OpeNER webservices using Curl

You can use the opener webservices using Curl. All components, except the language identifier, take KAF (XML) as input and output KAF. Only the language identifier takes "raw text" as an input.

To send a text to the language identifier you can use curl with the -F option to have curl read text out of files, like this:

echo "The hotel was very nice. The location is a bit far from the center though." > review.txt
curl -F "input=>review.txt" -F "kaf=true" http://opener.olery.com/language-identifier -XPOST > language.kaf

the kaf=true specifies that you want the language-identifier to output KAF, so that you can import it to the next part of the chain.

Now that we have a language.kaf file containing the language-identifier output, we can feed it into the tokenizer like this:

curl -F "input=<language.kaf" -F "kaf=true" http://opener.olery.com/tokenizer -XPOST > tokenizer.kaf

Here we also need a kaf=true option, to tell the tokenizer it gets kaf as an input. (It takes the language identification code from there).

Then, rinse and repeat for the other steps:

echo "The hotel was very nice. The location is a bit far from the center though." > review.txt
curl -F "input=<review.txt" -F "kaf=true" http://opener.olery.com/language-identifier -XPOST > language.kaf
curl -F "input=<language.kaf" -F "kaf=true" http://opener.olery.com/tokenizer -XPOST > tokenizer.kaf
curl -F "input=<tokenizer.kaf" http://opener.olery.com/pos-tagger -XPOST > pos-tagger.kaf
curl -F "input=<pos-tagger.kaf" http://opener.olery.com/polarity-tagger -XPOST > polarity-tagger.kaf
curl -F "input=<polarity-tagger.kaf" http://opener.olery.com/opinion-detector -XPOST

This will give the following result:

<?xml version='1.0' encoding='UTF-8'?>
<KAF xml:lang="en" version="v1.opener">
  <kafHeader>
    <linguisticProcessors layer="text">
      <lp name="opennlp-en-tok" timestamp="2013-08-12T09:19:52Z" version="1.0"/>
      <lp name="opennlp-en-sent" timestamp="2013-08-12T09:19:53Z" version="1.0"/>
    </linguisticProcessors>
    <linguisticProcessors layer="terms">
      <lp name="ehu-pos-en" timestamp="2013-08-12 9:19:54" version="1.0"/>
      <lp timestamp="2013-08-12T09:19:55UTC" version="1.0" name="Basic_polarity_tagger_with_pos"/>
    </linguisticProcessors>
    <linguisticProcessor layer="opinions">
      <lp timestamp="2013-08-12T09:20:01UTC" version="1.0" name="Crfsuite machine learning opinion miner"/>
    </linguisticProcessor>
  </kafHeader>
  <text>
    <wf wid="w1" sent="1" para="1" offset="0" length="3">The</wf>
    <wf wid="w2" sent="1" para="1" offset="4" length="5">hotel</wf>
    <wf wid="w3" sent="1" para="1" offset="10" length="3">was</wf>
    <wf wid="w4" sent="1" para="1" offset="14" length="4">very</wf>
    <wf wid="w5" sent="1" para="1" offset="19" length="4">nice</wf>
    <wf wid="w6" sent="1" para="1" offset="23" length="1">.</wf>
    <wf wid="w7" sent="2" para="1" offset="25" length="3">The</wf>
    <wf wid="w8" sent="2" para="1" offset="29" length="8">location</wf>
    <wf wid="w9" sent="2" para="1" offset="38" length="2">is</wf>
    <wf wid="w10" sent="2" para="1" offset="41" length="1">a</wf>
    <wf wid="w11" sent="2" para="1" offset="43" length="3">bit</wf>
    <wf wid="w12" sent="2" para="1" offset="47" length="3">far</wf>
    <wf wid="w13" sent="2" para="1" offset="51" length="4">from</wf>
    <wf wid="w14" sent="2" para="1" offset="56" length="3">the</wf>
    <wf wid="w15" sent="2" para="1" offset="60" length="6">center</wf>
    <wf wid="w16" sent="2" para="1" offset="67" length="6">though</wf>
    <wf wid="w17" sent="2" para="1" offset="73" length="1">.</wf>
  </text>
  <terms>
    <!--The-->
    <term tid="t1" type="close" lemma="the" pos="D" morphofeat="DT">
      <span>
        <target id="w1"/>
      </span>
    </term>
    <!--hotel-->
    <term tid="t2" type="open" lemma="hotel" pos="N" morphofeat="NN">
      <span>
        <target id="w2"/>
      </span>
    </term>
    <!--was-->
    <term tid="t3" type="open" lemma="be" pos="V" morphofeat="VBD">
      <span>
        <target id="w3"/>
      </span>
    </term>
    <!--very-->
    <term tid="t4" type="open" lemma="very" pos="A" morphofeat="RB">
      <span>
        <target id="w4"/>
      </span>
      <sentiment resource="VUA_olery_lexicon_en_lmf" sentiment_modifier="intensifier"/>
    </term>
    <!--nice-->
    <term tid="t5" type="open" lemma="nice" pos="G" morphofeat="JJ">
      <span>
        <target id="w5"/>
      </span>
      <sentiment polarity="positive" resource="VUA_olery_lexicon_en_lmf"/>
    </term>
    <!--.-->
    <term tid="t6" type="close" lemma="." pos="O" morphofeat=".">
      <span>
        <target id="w6"/>
      </span>
    </term>
    <!--The-->
    <term tid="t7" type="close" lemma="the" pos="D" morphofeat="DT">
      <span>
        <target id="w7"/>
      </span>
    </term>
    <!--location-->
    <term tid="t8" type="open" lemma="location" pos="N" morphofeat="NN">
      <span>
        <target id="w8"/>
      </span>
    </term>
    <!--is-->
    <term tid="t9" type="open" lemma="be" pos="V" morphofeat="VBZ">
      <span>
        <target id="w9"/>
      </span>
    </term>
    <!--a-->
    <term tid="t10" type="close" lemma="a" pos="D" morphofeat="DT">
      <span>
        <target id="w10"/>
      </span>
    </term>
    <!--bit-->
    <term tid="t11" type="open" lemma="bit" pos="N" morphofeat="NN">
      <span>
        <target id="w11"/>
      </span>
    </term>
    <!--far-->
    <term tid="t12" type="open" lemma="far" pos="A" morphofeat="RB">
      <span>
        <target id="w12"/>
      </span>
      <sentiment polarity="negative" resource="VUA_olery_lexicon_en_lmf"/>
    </term>
    <!--from-->
    <term tid="t13" type="close" lemma="from" pos="P" morphofeat="IN">
      <span>
        <target id="w13"/>
      </span>
    </term>
    <!--the-->
    <term tid="t14" type="close" lemma="the" pos="D" morphofeat="DT">
      <span>
        <target id="w14"/>
      </span>
    </term>
    <!--center-->
    <term tid="t15" type="open" lemma="center" pos="N" morphofeat="NN">
      <span>
        <target id="w15"/>
      </span>
    </term>
    <!--though-->
    <term tid="t16" type="open" lemma="though" pos="A" morphofeat="RB">
      <span>
        <target id="w16"/>
      </span>
    </term>
    <!--.-->
    <term tid="t17" type="close" lemma="." pos="O" morphofeat=".">
      <span>
        <target id="w17"/>
      </span>
    </term>
  </terms>
  <features/>
  <opinions>
    <opinion oid="o_1">
      <opinion_holder>
        <span/>
      </opinion_holder>
      <opinion_target>
        <span/>
      </opinion_target>
      <opinion_expression polarity="positive" strength="1">
        <span>
          <target id="t4"/>
          <target id="t5"/>
        </span>
      </opinion_expression>
    </opinion>
    <opinion oid="o_2">
      <opinion_holder>
        <span/>
      </opinion_holder>
      <opinion_target>
        <span/>
      </opinion_target>
      <opinion_expression polarity="negative" strength="1">
        <span>
          <target id="t10"/>
          <target id="t11"/>
          <target id="t12"/>
          <target id="t13"/>
          <target id="t14"/>
          <target id="t15"/>
          <target id="t16"/>
        </span>
      </opinion_expression>
    </opinion>
  </opinions>
</KAF>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment