Skip to content

Instantly share code, notes, and snippets.

@guillaume-be
Created December 16, 2020 07:32
Show Gist options
  • Save guillaume-be/34a982ca33749ba4be2951836ab36b97 to your computer and use it in GitHub Desktop.
Save guillaume-be/34a982ca33749ba4be2951836ab36b97 to your computer and use it in GitHub Desktop.
marian_model_loop.rs
// Copyright 2018-2020 The HuggingFace Inc. team.
// Copyright 2020 Marian Team Authors
// Copyright 2019-2020 Guillaume Becquin
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
extern crate anyhow;
use rust_bert::resources::{Resource, RemoteResource};
use rust_bert::marian::{MarianModelResources, MarianVocabResources, MarianSpmResources, MarianConfigResources, MarianForConditionalGeneration};
use rust_tokenizers::tokenizer::{MarianTokenizer, TruncationStrategy, MultiThreadedTokenizer};
use tch::{Device, nn, Tensor, no_grad};
use rust_bert::bart::BartConfig;
use rust_bert::Config;
fn main() -> anyhow::Result<()> {
let mut i = 0;
loop {
let model_resource = Resource::Remote(RemoteResource::from_pretrained(MarianModelResources::ENGLISH2RUSSIAN));
let vocab_resource = Resource::Remote(RemoteResource::from_pretrained(MarianVocabResources::ENGLISH2RUSSIAN));
let merge_resource = Resource::Remote(RemoteResource::from_pretrained(MarianSpmResources::ENGLISH2RUSSIAN));
let config_resource = Resource::Remote(RemoteResource::from_pretrained(MarianConfigResources::ENGLISH2RUSSIAN));
let out1 = model_resource.get_local_path().unwrap();
let out2 = vocab_resource.get_local_path().unwrap();
let out3 = merge_resource.get_local_path().unwrap();
let out4 = config_resource.get_local_path().unwrap();
let tokenizer = MarianTokenizer::from_files(out2.to_str().unwrap(), &out3.to_str().unwrap(), false)?;
let config = BartConfig::from_file(&out4);
let device = Device::cuda_if_available();
let mut vs = nn::VarStore::new(device);
let model = MarianForConditionalGeneration::new(&vs.root(), &config, true);
vs.load(out1)?;
// Define input
let input = ["One two three four"];
let tokenized_input = tokenizer.encode_list(input, 1024, &TruncationStrategy::LongestFirst, 0);
let max_len = tokenized_input
.iter()
.map(|input| input.token_ids.len())
.max()
.unwrap();
let tokenized_input = tokenized_input
.iter()
.map(|input| input.token_ids.clone())
.map(|mut input| {
input.extend(vec![0; max_len - input.len()]);
input
})
.map(|input| Tensor::of_slice(&(input)))
.collect::<Vec<_>>();
let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);
// Forward pass
let model_output =
no_grad(|| model.forward_t(Some(&input_tensor), None, None, Some(&input_tensor), None, None, false));
println!("{}", i);
i += 1;
}
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment