nivir/gist:64173b23a71a1a32d7245a9830926d0c

## gistfile.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#https://spacy.io/usage/spacy-101

import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp(unicode("Apple is looking at buying U.K. startup for $1 billion"))

def fun_annotations():
 for token in doc:
     print(token.text, token.pos_, token.dep_)
 print doc


def fun_tokenizes():
 for token in doc:
     print(token.text)


def fun_part_of_speech():
 for token in doc:
     print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
             token.shape_, token.is_alpha, token.is_stop)


def fun_named_entities():
 for ent in doc.ents:
     print(ent.text, ent.start_char, ent.end_char, ent.label_)


def fun_word_vector():
 tokens = nlp(unicode("dog cat banana afskfsd"))

 for token in tokens:
     print(token.text, token.has_vector, token.vector_norm, token.is_oov)


def fun_word_vector_2():
 pass
 nlp = spacy.load("en_core_web_md")  # make sure to use larger package!
 doc1 = nlp(unicode("I like salty fries and hamburgers."))
 doc2 = nlp(unicode("Fast food tastes very good."))

 # Similarity of two documents
 print(doc1, "<->", doc2, doc1.similarity(doc2))
 # Similarity of tokens and spans
 french_fries = doc1[2:4]
 burgers = doc1[5]
 print(french_fries, "<->", burgers, french_fries.similarity(burgers))


def fun_voab_hashes_lexemes():
 nlp = spacy.load("en_core_web_sm")
 doc = nlp(unicode("I love coffee"))
 print(doc.vocab.strings["coffee"])  # 3197928453018144401
 print(doc.vocab.strings[3197928453018144401])  # 'coffee'


def fun_voab_hashes_lexemes_2():
 for word in doc:
     lexeme = doc.vocab[word.text]
     print(lexeme.text, lexeme.orth, lexeme.shape_, lexeme.prefix_, lexeme.suffix_,
             lexeme.is_alpha, lexeme.is_digit, lexeme.is_title, lexeme.lang_)


def fun_voab_hashes_lexemes_3_wrong():
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
 print(doc.vocab.strings["coffee"])  # 3197928453018144401
 print(doc.vocab.strings[3197928453018144401])  # 'coffee' 👍

 empty_doc = Doc(Vocab())  # New Doc with empty Vocab
 # empty_doc.vocab.strings[3197928453018144401] will raise an error :(

 empty_doc.vocab.strings.add("coffee")  # Add "coffee" and generate hash
 print(empty_doc.vocab.strings[3197928453018144401])  # 'coffee' 👍

 new_doc = Doc(doc.vocab)  # Create new doc with first doc's vocab
 print(new_doc.vocab.strings[3197928453018144401])  # 'coffee' 👍


#fun_annotations()
#fun_tokenizes()
#fun_part_of_speech()
#fun_named_entities()
#fun_word_vector()
#fun_word_vector_2()
#fun_voab_hashes_lexemes()
#fun_voab_hashes_lexemes_2()
	#!/usr/bin/env python
	# -- coding: utf-8 --
	#https://spacy.io/usage/spacy-101

	import spacy

	nlp = spacy.load("en_core_web_sm")
	doc = nlp(unicode("Apple is looking at buying U.K. startup for $1 billion"))

	def fun_annotations():
	for token in doc:
	print(token.text, token.pos_, token.dep_)
	print doc


	def fun_tokenizes():
	for token in doc:
	print(token.text)



	def fun_part_of_speech():
	for token in doc:
	print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
	token.shape_, token.is_alpha, token.is_stop)



	def fun_named_entities():
	for ent in doc.ents:
	print(ent.text, ent.start_char, ent.end_char, ent.label_)



	def fun_word_vector():
	tokens = nlp(unicode("dog cat banana afskfsd"))

	for token in tokens:
	print(token.text, token.has_vector, token.vector_norm, token.is_oov)




	def fun_word_vector_2():
	pass
	nlp = spacy.load("en_core_web_md") # make sure to use larger package!
	doc1 = nlp(unicode("I like salty fries and hamburgers."))
	doc2 = nlp(unicode("Fast food tastes very good."))

	# Similarity of two documents
	print(doc1, "<->", doc2, doc1.similarity(doc2))
	# Similarity of tokens and spans
	french_fries = doc1[2:4]
	burgers = doc1[5]
	print(french_fries, "<->", burgers, french_fries.similarity(burgers))


	def fun_voab_hashes_lexemes():
	nlp = spacy.load("en_core_web_sm")
	doc = nlp(unicode("I love coffee"))
	print(doc.vocab.strings["coffee"]) # 3197928453018144401
	print(doc.vocab.strings[3197928453018144401]) # 'coffee'




	def fun_voab_hashes_lexemes_2():
	for word in doc:
	lexeme = doc.vocab[word.text]
	print(lexeme.text, lexeme.orth, lexeme.shape_, lexeme.prefix_, lexeme.suffix_,
	lexeme.is_alpha, lexeme.is_digit, lexeme.is_title, lexeme.lang_)


	def fun_voab_hashes_lexemes_3_wrong():
	from spacy.tokens import Doc
	from spacy.vocab import Vocab
	print(doc.vocab.strings["coffee"]) # 3197928453018144401
	print(doc.vocab.strings[3197928453018144401]) # 'coffee' 👍

	empty_doc = Doc(Vocab()) # New Doc with empty Vocab
	# empty_doc.vocab.strings[3197928453018144401] will raise an error :(

	empty_doc.vocab.strings.add("coffee") # Add "coffee" and generate hash
	print(empty_doc.vocab.strings[3197928453018144401]) # 'coffee' 👍

	new_doc = Doc(doc.vocab) # Create new doc with first doc's vocab
	print(new_doc.vocab.strings[3197928453018144401]) # 'coffee' 👍


	#fun_annotations()
	#fun_tokenizes()
	#fun_part_of_speech()
	#fun_named_entities()
	#fun_word_vector()
	#fun_word_vector_2()
	#fun_voab_hashes_lexemes()
	#fun_voab_hashes_lexemes_2()