Key/Command | Description |
---|---|
Tab | Auto-complete files and folder names |
Ctrl + A | Go to the beginning of the line you are currently typing on |
Ctrl + E | Go to the end of the line you are currently typing on |
Ctrl + U | Clear the line before the cursor |
Ctrl + K | Clear the line after the cursor |
Ctrl + W | Delete the word before the cursor |
Ctrl + T | Swap the last two characters before the cursor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MAX_LEN = 100 # max is 512 for BERT | |
class text_dataset(Dataset): | |
def __init__(self, X, y): | |
self.X = X | |
self.y = y | |
def __getitem__(self,index): | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pytorch_transformers import BertConfig | |
from pytorch_transformers import BertModel | |
config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, | |
num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) | |
class BertForSequenceClassification(nn.Module): | |
def __init__(self, num_labels=2): | |
super(BertForSequenceClassification, self).__init__() | |
self.num_labels = num_labels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pytorch_transformers import BertTokenizer | |
tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path='bert-base-uncased') | |
input_text = "this is a piece of feedback" | |
tokenized = tokenizer.tokenize(input_text) | |
tokenizer.convert_tokens_to_ids(tokenized_text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from setuptools import setup | |
setup( | |
name="multiTag_custom_package", | |
version="0.1", | |
include_package_data=True, | |
scripts=["utils/preprocess.py", "multiTag.py"] | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Predictor(object): | |
"""Interface for constructing custom predictors.""" | |
def predict(self, instances, **kwargs): | |
"""Performs custom prediction. | |
Instances are the decoded values from the request. They have already | |
been deserialized from JSON. | |
Args: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import numpy as np | |
class CustomPredictor(object): | |
columns = ['UX_UI', 'Documentation', 'Performance', 'Bugs', 'Feature_Request', | |
'Price', 'Customer_Support', 'Onboarding', 'Reporting', | |
'Alerts_Notification', 'Value_Prop'] | |
def __init__(self, models): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import FunctionTransformer | |
from sklearn.ensemble import RandomForestClassifier | |
import preprocess | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.preprocessing import LabelEncoder | |
import pandas as pd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
nltk.download('stopwords', download_dir='/tmp') | |
nltk.download('punkt', download_dir='/tmp') | |
nltk.download('averaged_perceptron_tagger', download_dir='/tmp') | |
nltk.download('wordnet', download_dir='/tmp') | |
nltk.data.path.append('/tmp') | |
import re | |
import string |
NewerOlder