Skip to content

Instantly share code, notes, and snippets.

MAX_LEN = 100 # max is 512 for BERT
class text_dataset(Dataset):
def __init__(self, X, y):
self.X = X
self.y = y
def __getitem__(self,index):
from pytorch_transformers import BertConfig
from pytorch_transformers import BertModel
config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
class BertForSequenceClassification(nn.Module):
def __init__(self, num_labels=2):
super(BertForSequenceClassification, self).__init__()
self.num_labels = num_labels
from pytorch_transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path='bert-base-uncased')
input_text = "this is a piece of feedback"
tokenized = tokenizer.tokenize(input_text)
tokenizer.convert_tokens_to_ids(tokenized_text)
from setuptools import setup
setup(
name="multiTag_custom_package",
version="0.1",
include_package_data=True,
scripts=["utils/preprocess.py", "multiTag.py"]
)
class Predictor(object):
"""Interface for constructing custom predictors."""
def predict(self, instances, **kwargs):
"""Performs custom prediction.
Instances are the decoded values from the request. They have already
been deserialized from JSON.
Args:
import pickle
import numpy as np
class CustomPredictor(object):
columns = ['UX_UI', 'Documentation', 'Performance', 'Bugs', 'Feature_Request',
'Price', 'Customer_Support', 'Onboarding', 'Reporting',
'Alerts_Notification', 'Value_Prop']
def __init__(self, models):
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.ensemble import RandomForestClassifier
import preprocess
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import nltk
nltk.download('stopwords', download_dir='/tmp')
nltk.download('punkt', download_dir='/tmp')
nltk.download('averaged_perceptron_tagger', download_dir='/tmp')
nltk.download('wordnet', download_dir='/tmp')
nltk.data.path.append('/tmp')
import re
import string
["Alerts_Notification", "Bugs", "Customer_Support", "Documentation", "Feature_Request", "Onboarding", "Performance", "Price", "Reporting", "UX_UI", "Value_Prop"]

SHORTCUTS

Key/Command Description
Tab Auto-complete files and folder names
Ctrl + A Go to the beginning of the line you are currently typing on
Ctrl + E Go to the end of the line you are currently typing on
Ctrl + U Clear the line before the cursor
Ctrl + K Clear the line after the cursor
Ctrl + W Delete the word before the cursor
Ctrl + T Swap the last two characters before the cursor