-
-
Save yoptar/bd5d738861c52f88d24bc1db300d956a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"from deeppavlov import configs, build_model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with configs.ner.ner_ontonotes_bert_mult.open(encoding='utf8') as f:\n", | |
" ner_config = json.load(f)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ner_config['chainer']['pipe'].append({\n", | |
" 'class_name': 'pymorphy_russian_lemmatizer',\n", | |
" 'in': ['x_tokens'],\n", | |
" 'out': ['x_lemmas']\n", | |
"})\n", | |
"ner_config['chainer']['out'] = ['x_tokens', 'x_lemmas', 'y_pred']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"2019-11-14 11:48:59.400 INFO in 'deeppavlov.download'['download'] at line 118: Skipped http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip download because of matching hashes\n", | |
"2019-11-14 11:49:01.167 INFO in 'deeppavlov.download'['download'] at line 118: Skipped http://files.deeppavlov.ai/deeppavlov_data/ner_ontonotes_bert_mult_v1.tar.gz download because of matching hashes\n", | |
"[nltk_data] Downloading package punkt to /home/yoptar/nltk_data...\n", | |
"[nltk_data] Package punkt is already up-to-date!\n", | |
"[nltk_data] Downloading package stopwords to /home/yoptar/nltk_data...\n", | |
"[nltk_data] Package stopwords is already up-to-date!\n", | |
"[nltk_data] Downloading package perluniprops to\n", | |
"[nltk_data] /home/yoptar/nltk_data...\n", | |
"[nltk_data] Package perluniprops is already up-to-date!\n", | |
"[nltk_data] Downloading package nonbreaking_prefixes to\n", | |
"[nltk_data] /home/yoptar/nltk_data...\n", | |
"[nltk_data] Package nonbreaking_prefixes is already up-to-date!\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/bert_dp/tokenization.py:125: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"2019-11-14 11:49:03.788 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 113: [loading vocabulary from /data/deeppavlov_data/models/ner_ontonotes_bert_mult/tag.dict]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:37: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n", | |
"\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:222: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n", | |
"\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:222: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", | |
"\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:193: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.\n", | |
"\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/models/bert/bert_ner.py:124: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n", | |
"\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/models/bert/bert_ner.py:225: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Using TensorFlow backend.\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/bert_dp/modeling.py:178: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.\n", | |
"\n", | |
"WARNING:tensorflow:\n", | |
"The TensorFlow contrib module will not be included in TensorFlow 2.0.\n", | |
"For more information, please see:\n", | |
" * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n", | |
" * https://github.com/tensorflow/addons\n", | |
" * https://github.com/tensorflow/io (for I/O related ops)\n", | |
"If you depend on functionality not listed there, please file an issue.\n", | |
"\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/bert_dp/modeling.py:366: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/bert_dp/modeling.py:680: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Use keras.layers.dense instead.\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Call initializer instance with the dtype argument instead of passing it to the constructor\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/models/bert/bert_ner.py:348: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Use tf.where in 2.0, which has the same broadcast rule as np.where\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/tensorflow/contrib/crf/python/ops/crf.py:213: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Please use `keras.layers.RNN(cell)`, which is equivalent to this API\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:234: The name tf.train.AdadeltaOptimizer is deprecated. Please use tf.compat.v1.train.AdadeltaOptimizer instead.\n", | |
"\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/tensorflow/python/training/moving_averages.py:433: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.\n", | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/models/bert/bert_ner.py:137: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n", | |
"Instructions for updating:\n", | |
"Use standard file APIs to check for files with this prefix.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"2019-11-14 11:49:21.564 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 51: [loading model from /data/deeppavlov_data/models/ner_ontonotes_bert_mult/model]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:54: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.\n", | |
"\n", | |
"INFO:tensorflow:Restoring parameters from /data/deeppavlov_data/models/ner_ontonotes_bert_mult/model\n" | |
] | |
} | |
], | |
"source": [ | |
"ner_model = build_model(ner_config, download=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = 'Президент Российской Федерации встретился с президентом Соединенных Штатов Америки.'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tokens, lemmas, tags = ner_model([data])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Президент президент O\n", | |
"Российской российский B-GPE\n", | |
"Федерации федерация I-GPE\n", | |
"встретился встретиться O\n", | |
"с с O\n", | |
"президентом президент O\n", | |
"Соединенных соединить B-GPE\n", | |
"Штатов штат I-GPE\n", | |
"Америки америка I-GPE\n", | |
". . O\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for tokens, lemmas, tags in zip(tokens, lemmas, tags):\n", | |
" for token, lemma, tag in zip(tokens, lemmas, tags):\n", | |
" print(token, lemma, tag)\n", | |
" print()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "DeepPavlov", | |
"language": "python", | |
"name": "deeppavlov" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.1" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"state": {}, | |
"version_major": 2, | |
"version_minor": 0 | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment