Skip to content

Instantly share code, notes, and snippets.

@yoptar
Created November 14, 2019 08:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yoptar/bd5d738861c52f88d24bc1db300d956a to your computer and use it in GitHub Desktop.
Save yoptar/bd5d738861c52f88d24bc1db300d956a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from deeppavlov import configs, build_model"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"with configs.ner.ner_ontonotes_bert_mult.open(encoding='utf8') as f:\n",
" ner_config = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"ner_config['chainer']['pipe'].append({\n",
" 'class_name': 'pymorphy_russian_lemmatizer',\n",
" 'in': ['x_tokens'],\n",
" 'out': ['x_lemmas']\n",
"})\n",
"ner_config['chainer']['out'] = ['x_tokens', 'x_lemmas', 'y_pred']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2019-11-14 11:48:59.400 INFO in 'deeppavlov.download'['download'] at line 118: Skipped http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12.zip download because of matching hashes\n",
"2019-11-14 11:49:01.167 INFO in 'deeppavlov.download'['download'] at line 118: Skipped http://files.deeppavlov.ai/deeppavlov_data/ner_ontonotes_bert_mult_v1.tar.gz download because of matching hashes\n",
"[nltk_data] Downloading package punkt to /home/yoptar/nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n",
"[nltk_data] Downloading package stopwords to /home/yoptar/nltk_data...\n",
"[nltk_data] Package stopwords is already up-to-date!\n",
"[nltk_data] Downloading package perluniprops to\n",
"[nltk_data] /home/yoptar/nltk_data...\n",
"[nltk_data] Package perluniprops is already up-to-date!\n",
"[nltk_data] Downloading package nonbreaking_prefixes to\n",
"[nltk_data] /home/yoptar/nltk_data...\n",
"[nltk_data] Package nonbreaking_prefixes is already up-to-date!\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/bert_dp/tokenization.py:125: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2019-11-14 11:49:03.788 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 113: [loading vocabulary from /data/deeppavlov_data/models/ner_ontonotes_bert_mult/tag.dict]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:37: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n",
"\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:222: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
"\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:222: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n",
"\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:193: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.\n",
"\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/models/bert/bert_ner.py:124: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n",
"\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/models/bert/bert_ner.py:225: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/bert_dp/modeling.py:178: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.\n",
"\n",
"WARNING:tensorflow:\n",
"The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
"For more information, please see:\n",
" * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
" * https://github.com/tensorflow/addons\n",
" * https://github.com/tensorflow/io (for I/O related ops)\n",
"If you depend on functionality not listed there, please file an issue.\n",
"\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/bert_dp/modeling.py:366: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/bert_dp/modeling.py:680: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use keras.layers.dense instead.\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Call initializer instance with the dtype argument instead of passing it to the constructor\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/models/bert/bert_ner.py:348: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use tf.where in 2.0, which has the same broadcast rule as np.where\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/tensorflow/contrib/crf/python/ops/crf.py:213: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use `keras.layers.RNN(cell)`, which is equivalent to this API\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:234: The name tf.train.AdadeltaOptimizer is deprecated. Please use tf.compat.v1.train.AdadeltaOptimizer instead.\n",
"\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/venv37/lib/python3.7/site-packages/tensorflow/python/training/moving_averages.py:433: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.\n",
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/models/bert/bert_ner.py:137: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use standard file APIs to check for files with this prefix.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2019-11-14 11:49:21.564 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 51: [loading model from /data/deeppavlov_data/models/ner_ontonotes_bert_mult/model]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /home/yoptar/reps/DeepPavlov/deeppavlov/core/models/tf_model.py:54: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.\n",
"\n",
"INFO:tensorflow:Restoring parameters from /data/deeppavlov_data/models/ner_ontonotes_bert_mult/model\n"
]
}
],
"source": [
"ner_model = build_model(ner_config, download=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"data = 'Президент Российской Федерации встретился с президентом Соединенных Штатов Америки.'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"tokens, lemmas, tags = ner_model([data])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Президент президент O\n",
"Российской российский B-GPE\n",
"Федерации федерация I-GPE\n",
"встретился встретиться O\n",
"с с O\n",
"президентом президент O\n",
"Соединенных соединить B-GPE\n",
"Штатов штат I-GPE\n",
"Америки америка I-GPE\n",
". . O\n",
"\n"
]
}
],
"source": [
"for tokens, lemmas, tags in zip(tokens, lemmas, tags):\n",
" for token, lemma, tag in zip(tokens, lemmas, tags):\n",
" print(token, lemma, tag)\n",
" print()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "DeepPavlov",
"language": "python",
"name": "deeppavlov"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment