FInd a better version here : https://benjcunningham.org/installing-transformers-on-jetson-nano.html
git clone https://github.com/google/sentencepiece
cd /path/to/sentencepiece
mkdir build
cd build
cmake ..
make -j $(nproc)
[PAD] | |
[UNK] | |
[CLS] | |
[SEP] | |
[MASK] | |
! | |
" | |
% | |
' | |
, |
import json | |
import sys | |
from tqdm import tqdm | |
from tqdm.contrib.concurrent import thread_map | |
import librosa | |
import random | |
print("python create_manifest.py script_path create_train_test_bool(True/False)") | |
script_path = sys.argv[1] |
# It contains the default values for training a Conformer-Transducer ASR model, large size (~120M) with Transducer loss and sub-word encoding. | |
# Architecture and training config: | |
# Default learning parameters in this config are set for effective batch size of 2K. To train it with smaller effective | |
# batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches. | |
# Here are the recommended configs for different variants of Conformer-Transducer, other parameters are the same as in this config file. | |
# | |
# +-------------+---------+---------+----------+--------------+--------------------------+ | |
# | Model | d_model | n_heads | n_layers | weight_decay | pred_hidden/joint_hidden | | |
# +=============+=========+========+===========+==============+==========================+ |
#!/usr/bin/env python | |
# coding=utf-8 | |
# Copyright 2021 The HuggingFace Inc. team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# |
import torch | |
from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel | |
from torch.utils.data import Dataset | |
from transformers import AutoFeatureExtractor, AutoTokenizer, SpeechEncoderDecoderModel | |
from torch.utils.data import DataLoader | |
from transformers import TrainingArguments, Trainer | |
import librosa | |
import argparse |
## Extended from https://github.com/huggingface/transformers/blob/master/notebooks/04-onnx-export.ipynb | |
from transformers import DistilBertTokenizerFast,DistilBertModel | |
from torch.cuda import get_device_name | |
from contextlib import contextmanager | |
from dataclasses import dataclass | |
from time import time | |
from tqdm import trange | |
from os import environ |
min_trimmed_length = 3 | |
min_word_count = 1 | |
max_word_count = 16 | |
min_characters = 2 | |
may_end_with_colon = false | |
quote_start_with_letter = true | |
needs_punctuation_end = false | |
needs_letter_start = true | |
allowed_symbols_regex = "[।-,;: \\-\\?\\.!]" | |
needs_uppercase_start = false |
## Extended from https://github.com/huggingface/transformers/blob/master/notebooks/04-onnx-export.ipynb | |
from transformers import DistilBertTokenizerFast,DistilBertModel | |
from torch.cuda import get_device_name | |
from contextlib import contextmanager | |
from dataclasses import dataclass | |
from time import time | |
from tqdm import trange | |
from os import environ |
FInd a better version here : https://benjcunningham.org/installing-transformers-on-jetson-nano.html
git clone https://github.com/google/sentencepiece
cd /path/to/sentencepiece
mkdir build
cd build
cmake ..
make -j $(nproc)
Query:Covid risks in diabetic patients? +------+----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Rank | Answer | Doc Id | +------+----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | 1 | severe complications | b696d208705fcb1925693c5f0d118733bb557ea6_18 | Exploring diseases/traits and blood proteins causally related to expression of ACE2, the putative receptor of 2019-nCov: A Mendelian Randomization analysis | | 2