This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Тренировка BERT с заданной конфигурацией на небольшом корпусе | |
""" | |
import numpy as np | |
import keras | |
import os | |
import collections | |
import pickle |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Использование модели BERT, натренированной кодом train_bert.py, в качестве энкодера | |
в автоэнкодерной модели. | |
Для экспериментов по изучению зависимости качества декодирования от сложности BERT. | |
""" | |
import random | |
import numpy as np |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Эксперимент с моделью несимметричного автоэнкодера с энкодером на базе претренированной модели BERT | |
""" | |
import io | |
import os | |
import random | |
import numpy as np | |
import sklearn.model_selection |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import itertools | |
import pickle | |
import collections | |
import glob | |
import os | |
import tqdm | |
import torch | |
from transformers import GPT2LMHeadModel, GPT2Tokenizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Генерация датасета для интерпретации вопросов: | |
- что анализирую? | |
- изображение | я анализирую изображение | |
30-08-2020 Добавлена проверка, что сгенерированный сэмпл не имеется в датасетах qa.txt и interpretations.txt | |
28-04-2021 Доработки для получения сырого датасета из assertions_1s.txt | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Эксперимент с визуализацией эмбеддингов токенов в rugpt. | |
""" | |
import os | |
import io | |
import collections | |
import torch | |
import transformers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Автоматическая оценка точности генерации цифрового ответа для p(0)qa сэмплов. | |
13.10.2022 Добавил валидацию модели на T5 | |
""" | |
import os | |
import io | |
import re | |
import time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Файнтюн rugpt на датасете перефразировок с использованием GPT2DoubleHeadsModel (https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.GPT2DoubleHeadsModel) | |
Для проектов чатбота и генеративных стихов. | |
Используется датасет перефразировок из проекта чатбота с добавленными сэмплами проекта генеративных стихов. | |
В качестве дистракторов используем негативные примеры перефразировок из этого же датасета плюс рандомные выборки. | |
04.01.2023 Заранее подготовленный датасет загружаем из paraphrases.json (см. публичную версию https://huggingface.co/datasets/inkoziev/paraphrases) | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Эксперимент с файнтюном: токены исходного текста не включаем в backprop, присваивая соответствующим целям (labels) значение -100 | |
""" | |
import os | |
import json | |
import io | |
import random | |
import itertools |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import argparse | |
import logging | |
import random | |
from flask import Flask, request | |
from flask import jsonify | |
import transformers | |
from kandinsky2 import get_kandinsky2 |