Skip to content

Instantly share code, notes, and snippets.

@marcossilva
marcossilva / init_model.py
Created December 9, 2022 20:50
Initalize GecBERTModel using pre-trained model weights
from gector.gec_model import GecBERTModel
VOCAB_PATH = "test_fixtures/roberta_model/vocabulary"
MODEL_PATH = "test_fixtures/roberta_model/weights.th"
# Initialize model
model = GecBERTModel(
vocab_path=VOCAB_PATH,
model_paths=[MODEL_PATH],
max_len=50,
min_len=3,
iterations=5,
@marcossilva
marcossilva / download_weights.py
Last active December 9, 2022 20:43
Download weights from GECToR pre-trained model
from pathlib import Path
import requests
from tqdm import tqdm
TEST_FIXTURES_DIR_PATH = "test_fixtures"
MODEL_URL = "https://grammarly-nlp-data-public.s3.amazonaws.com/gector/roberta_1_gectorv2.th"
def download_weights():
model_path = Path("test_fixtures/roberta_model/weights.th")
@marcossilva
marcossilva / icml_scrapper.py
Created August 9, 2021 02:30
Scrapper to Extract Title, Authors and Link Direct of ICML 2021 Papers
# We use the requests lib to make the HTTP GET request
import requests
r = requests.get("https://icml.cc/Conferences/2021/Schedule?type=Poster")
# And the BeautifulSoup lib to parse the HTML data
from bs4 import BeautifulSoup
soup = BeautifulSoup(r.text, 'html.parser')
# I used the select_one operator to use the CSS selector and get the element with class 'col-xs-12'
# and then the select to obtain all the divs with the onClick attribute
import boto3
import sagemaker
from sagemaker.amazon.amazon_estimator import get_image_uri
import json
import glob
sess = sagemaker.Session()
bucket = 'workspace-foguetes'
@marcossilva
marcossilva / plots.py
Created August 20, 2018 14:24
Plotly Boxplot, Violin and Scatter Plot Simple Examples
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 500)
py.init_notebook_mode(connected=True)
#Produce two random data to plot
y0 = np.random.randn(50)-1