This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Generated by the protocol buffer compiler. DO NOT EDIT! | |
# source: document.proto | |
import sys | |
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) | |
from google.protobuf import descriptor as _descriptor | |
from google.protobuf import message as _message | |
from google.protobuf import reflection as _reflection | |
from google.protobuf import symbol_database as _symbol_database | |
from google.protobuf import descriptor_pb2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note that this is not a standalone script. It has dependencies. | |
# Here it is just used as an example illustrating the scoring | |
# process for miRTex results. | |
from __future__ import unicode_literals, print_function | |
import pickle | |
import os | |
import codecs | |
import sys | |
import re |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function, unicode_literals | |
import json | |
import urllib | |
import urllib2 | |
# The API URL. | |
api_url = 'http://research.bioinformatics.udel.edu/miRTex/ner' | |
# The documents to be processed. | |
documents = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' Initialization: install NLTK python module and download data. | |
$ pip install nltk | |
$ echo 'import nltk; nltk.download("punkt")' | python | |
''' | |
from __future__ import print_function, unicode_literals | |
import nltk.data | |
_sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import unicode_literals | |
import sys | |
import codecs | |
import json | |
from lxml import etree | |
# See http://lxml.de/api.html#incremental-xml-generation | |
# for incremental XML generation used below. | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Test various characteristics of Unicode string in Python 2. | |
# In Python 2, we have 2 types to store string data, str and unicode. | |
# The type str is like a byte string, while the type unicode stores | |
# unicode codepoints, with each being represented by one or more bytes. | |
# Define a simple ASCII string, the type is str. | |
ascii_a = 'abcdefg' | |
print 'OUTPUT 1' | |
print type(ascii_a) |