Created
September 27, 2019 21:17
-
-
Save nlittlepoole/cd48ff2b261c8889f88f2467569debbb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Import Spark NLP | |
| from sparknlp.base import * | |
| from sparknlp.annotator import * | |
| from sparknlp.embeddings import * | |
| import sparknlp | |
| # Start Spark Session with Spark NLP | |
| spark = sparknlp.start() | |
| # Download a pre-trained pipeline | |
| pipeline = PretrainedPipeline('explain_document_dl', lang='en') | |
| # Your testing dataset | |
| text = """ | |
| The Mona Lisa is a 16th century oil painting created by Leonardo. | |
| It's held at the Louvre in Paris. | |
| """ | |
| # Annotate your testing dataset | |
| result = pipeline.annotate(text) | |
| # What's in the pipeline | |
| list(result.keys()) | |
| Output: ['entities', 'stem', 'checked', 'lemma', 'document', | |
| 'pos', 'token', 'ner', 'embeddings', 'sentence'] | |
| # Check the results | |
| result['entities'] | |
| Output: ['Mona Lisa', 'Leonardo', 'Louvre', 'Paris'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment