language-engineering

## gist:7388007
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from sussex_nltk.parse import dep_parse_sentences_arceager

sentences = ["This is the first example sentence",
             "This is the second example sentence",
             "This is the third example sentence"]

parsed_sents = dep_parse_sentences_arceager(pos_tag(word_tokenize(sentence)) for sentence in sentences)

## gist:7385679
from sussex_nltk.parse import load_parsed_example_sentences

parsed_example_sentences = load_parsed_example_sentences()

# To inspect the sentences, you could print them straight out
for parsed_sentence in parsed_example_sentences:
    print "--- Sentence ---"
    print parsed_sentence

## gist:7356872
Input:
    query_token = The token we're interested in. It is part of a dependency tree of tokens.

Output:
    opinions = An array of token forms (which constitute the extracted opinions related to the query token

opinions = []
for each dependent token of query_token, then
    append dependent's form to opinions array if the dependency relation of the dependent is "det"


## gist:7354649
def opinion_extractor(aspect_token, parsed_sentence):

    # Your function will have 3 steps:

    # i. Initialise a list of opinions
    opinions = []

    # ii. Find opinions (as an example we get all the dependants of the aspect token that have the relation "det")
    opinions += [dependant.form for dependant in parsed_sentence.get_dependants(aspect_token) if dependant.deprel == "det"]
    # You can continue to add to "opinions". Remember you can get the head of a token, and filter by PoS tag or Deprel too!

## gist:7354040
from sussex_nltk.parse import load_parsed_dvd_sentences, load_parsed_example_sentences

aspect = "plot"   # Set this to the aspect token you're interested in
save_file_path = r"/path/to/savefile.txt"    # Set this to the location of the file you wish to create/overwrite with the saved output.

# Tracking these numbers will allow us to see what proportion of sentences we discovered features in
sentences_with_discovered_features = 0  # Number of sentences we discovered features in
total_sentences = 0  # Total number of sentences

# This is a "with statement", it invokes a context manager, which handles the opening and closing of resources (like files)

## gist:7321434
# Say for example we acquire a list of BasicToken objects by getting all the dependants of a token:
dependants = parsed_sentence.get_dependants(aspect_token)

# We could filter that list, keeping only those tokens whose dependency relations with the aspect token are "dobj", by doing the following:
dependants = [token for token in dependants if token.deprel == "dobj"]

# Or we could filter that list, keeping only those tokens whose PoS tags are "RB" (for adverb)
dependants = [token for token in dependants if token.pos == "RB"]

# Or we could filter that list, keeping only those tokens whose form is NOT "main" or "special"

## gist:7321248
# Given a ParsedSentence object, and an aspect token acquired from it (as in the previous section)
# Get the head of the aspect token

head_token = parsed_sentence.get_head(aspect_token)
print head_token

## gist:7321059
# Given a ParsedSentence object, and an aspect token acquired from it (as in the previous section)
# Get all of the dependants of that aspect token.
dependants = parsed_sentence.get_dependants(aspect_token)

# You could print them out for inspection
for dependant in dependants:
    print dependant

## gist:7320665
aspect = "dialogue"

# If you have a ParsedSentence object, you can get all the tokens whose form matches the aspect as shown below.
# So instead of just printing the parsed_sentence as in the previous section, get its aspect tokens and print them.
aspect_tokens = parsed_sentence.get_query_tokens(aspect)

# You could iterate over them and print them for inspection
for aspect_token in aspect_tokens:
    print aspect_token


## gist:7320227
from sussex_nltk.parse import load_parsed_dvd_sentences

aspect = "dialogue"  # Our aspect word
parsed_sentences = load_parsed_dvd_sentences(aspect)

# To inspect the sentences, you could print them straight out
for parsed_sentence in parsed_sentences:
    print "--- Sentence ---"
    print parsed_sentence
	from nltk.tokenize import word_tokenize
	from nltk import pos_tag
	from sussex_nltk.parse import dep_parse_sentences_arceager

	sentences = ["This is the first example sentence",
	"This is the second example sentence",
	"This is the third example sentence"]

	parsed_sents = dep_parse_sentences_arceager(pos_tag(word_tokenize(sentence)) for sentence in sentences)
	from sussex_nltk.parse import load_parsed_example_sentences

	parsed_example_sentences = load_parsed_example_sentences()

	# To inspect the sentences, you could print them straight out
	for parsed_sentence in parsed_example_sentences:
	print "--- Sentence ---"
	print parsed_sentence
	Input:
	query_token = The token we're interested in. It is part of a dependency tree of tokens.

	Output:
	opinions = An array of token forms (which constitute the extracted opinions related to the query token

	opinions = []
	for each dependent token of query_token, then
	append dependent's form to opinions array if the dependency relation of the dependent is "det"
	def opinion_extractor(aspect_token, parsed_sentence):

	# Your function will have 3 steps:

	# i. Initialise a list of opinions
	opinions = []

	# ii. Find opinions (as an example we get all the dependants of the aspect token that have the relation "det")
	opinions += [dependant.form for dependant in parsed_sentence.get_dependants(aspect_token) if dependant.deprel == "det"]
	# You can continue to add to "opinions". Remember you can get the head of a token, and filter by PoS tag or Deprel too!
	from sussex_nltk.parse import load_parsed_dvd_sentences, load_parsed_example_sentences

	aspect = "plot" # Set this to the aspect token you're interested in
	save_file_path = r"/path/to/savefile.txt" # Set this to the location of the file you wish to create/overwrite with the saved output.

	# Tracking these numbers will allow us to see what proportion of sentences we discovered features in
	sentences_with_discovered_features = 0 # Number of sentences we discovered features in
	total_sentences = 0 # Total number of sentences

	# This is a "with statement", it invokes a context manager, which handles the opening and closing of resources (like files)
	# Say for example we acquire a list of BasicToken objects by getting all the dependants of a token:
	dependants = parsed_sentence.get_dependants(aspect_token)

	# We could filter that list, keeping only those tokens whose dependency relations with the aspect token are "dobj", by doing the following:
	dependants = [token for token in dependants if token.deprel == "dobj"]

	# Or we could filter that list, keeping only those tokens whose PoS tags are "RB" (for adverb)
	dependants = [token for token in dependants if token.pos == "RB"]

	# Or we could filter that list, keeping only those tokens whose form is NOT "main" or "special"
	# Given a ParsedSentence object, and an aspect token acquired from it (as in the previous section)
	# Get the head of the aspect token

	head_token = parsed_sentence.get_head(aspect_token)
	print head_token
	# Given a ParsedSentence object, and an aspect token acquired from it (as in the previous section)
	# Get all of the dependants of that aspect token.
	dependants = parsed_sentence.get_dependants(aspect_token)

	# You could print them out for inspection
	for dependant in dependants:
	print dependant
	aspect = "dialogue"

	# If you have a ParsedSentence object, you can get all the tokens whose form matches the aspect as shown below.
	# So instead of just printing the parsed_sentence as in the previous section, get its aspect tokens and print them.
	aspect_tokens = parsed_sentence.get_query_tokens(aspect)

	# You could iterate over them and print them for inspection
	for aspect_token in aspect_tokens:
	print aspect_token
	from sussex_nltk.parse import load_parsed_dvd_sentences

	aspect = "dialogue" # Our aspect word
	parsed_sentences = load_parsed_dvd_sentences(aspect)

	# To inspect the sentences, you could print them straight out
	for parsed_sentence in parsed_sentences:
	print "--- Sentence ---"
	print parsed_sentence