yashvijay

## dataset.py
MAX_LEN = 100 # max is 512 for BERT

class text_dataset(Dataset):
    def __init__(self, X, y):

        self.X = X
        self.y = y

    def __getitem__(self,index):


## model.py
from pytorch_transformers import BertConfig
from pytorch_transformers import BertModel

config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)

class BertForSequenceClassification(nn.Module):
    def __init__(self, num_labels=2):
        super(BertForSequenceClassification, self).__init__()
        self.num_labels = num_labels

## sample.py
from pytorch_transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path='bert-base-uncased')
input_text = "this is a piece of feedback"
tokenized = tokenizer.tokenize(input_text)
tokenizer.convert_tokens_to_ids(tokenized_text)

## setup.py
from setuptools import setup

setup(
     name="multiTag_custom_package",
     version="0.1",
     include_package_data=True,
     scripts=["utils/preprocess.py", "multiTag.py"]
)

## template.py
class Predictor(object):
    """Interface for constructing custom predictors."""

    def predict(self, instances, **kwargs):
        """Performs custom prediction.

        Instances are the decoded values from the request. They have already
        been deserialized from JSON.

        Args:

## multiTag.py
import pickle
import numpy as np

class CustomPredictor(object):

    columns = ['UX_UI', 'Documentation', 'Performance', 'Bugs', 'Feature_Request',
       'Price', 'Customer_Support', 'Onboarding', 'Reporting',
       'Alerts_Notification', 'Value_Prop']

    def __init__(self, models):

## createModel.py
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.ensemble import RandomForestClassifier

import preprocess

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

import pandas as pd

## preprocess.py
import nltk
nltk.download('stopwords', download_dir='/tmp')
nltk.download('punkt', download_dir='/tmp')
nltk.download('averaged_perceptron_tagger', download_dir='/tmp')
nltk.download('wordnet', download_dir='/tmp')
nltk.data.path.append('/tmp')

import re
import string

## tags.py
["Alerts_Notification", "Bugs", "Customer_Support", "Documentation", "Feature_Request", "Onboarding", "Performance", "Price", "Reporting", "UX_UI", "Value_Prop"]

## gist:32299e3e231685ff535f6e3ae2207820

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                yashvijay
                / gist:32299e3e231685ff535f6e3ae2207820
            
            
              Created
              July 3, 2017 11:32
                — forked from itsmattsoria/gistfil1.textile
            
              
                Mac Terminal Cheat Sheet
              
          
    SHORTCUTS


		Key/Command 
		Description 
	
	
		 Tab 
		 Auto-complete files and folder names 
	
	
		 Ctrl + A 
		 Go to the beginning of the line you are currently typing on 
	
	
		 Ctrl + E 
		 Go to the end of the line you are currently typing on 
	
	
		 Ctrl + U 
		 Clear the line before the cursor 
	
	
		 Ctrl + K 
		 Clear the line after the cursor 
	
	
		 Ctrl + W 
		 Delete the word before the cursor 
	
	
		 Ctrl + T 
		 Swap the last two characters before the cursor
	MAX_LEN = 100 # max is 512 for BERT

	class text_dataset(Dataset):
	def __init__(self, X, y):

	self.X = X
	self.y = y

	def __getitem__(self,index):
	from pytorch_transformers import BertConfig
	from pytorch_transformers import BertModel

	config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
	num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)

	class BertForSequenceClassification(nn.Module):
	def __init__(self, num_labels=2):
	super(BertForSequenceClassification, self).__init__()
	self.num_labels = num_labels
	from pytorch_transformers import BertTokenizer

	tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path='bert-base-uncased')
	input_text = "this is a piece of feedback"
	tokenized = tokenizer.tokenize(input_text)
	tokenizer.convert_tokens_to_ids(tokenized_text)
	from setuptools import setup

	setup(
	name="multiTag_custom_package",
	version="0.1",
	include_package_data=True,
	scripts=["utils/preprocess.py", "multiTag.py"]
	)
	class Predictor(object):
	"""Interface for constructing custom predictors."""

	def predict(self, instances, **kwargs):
	"""Performs custom prediction.

	Instances are the decoded values from the request. They have already
	been deserialized from JSON.

	Args:
	import pickle
	import numpy as np

	class CustomPredictor(object):

	columns = ['UX_UI', 'Documentation', 'Performance', 'Bugs', 'Feature_Request',
	'Price', 'Customer_Support', 'Onboarding', 'Reporting',
	'Alerts_Notification', 'Value_Prop']

	def __init__(self, models):
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import FunctionTransformer
	from sklearn.ensemble import RandomForestClassifier

	import preprocess

	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.preprocessing import LabelEncoder

	import pandas as pd
	import nltk
	nltk.download('stopwords', download_dir='/tmp')
	nltk.download('punkt', download_dir='/tmp')
	nltk.download('averaged_perceptron_tagger', download_dir='/tmp')
	nltk.download('wordnet', download_dir='/tmp')
	nltk.data.path.append('/tmp')

	import re
	import string
Key/Command	Description
Tab	Auto-complete files and folder names
Ctrl + A	Go to the beginning of the line you are currently typing on
Ctrl + E	Go to the end of the line you are currently typing on
Ctrl + U	Clear the line before the cursor
Ctrl + K	Clear the line after the cursor
Ctrl + W	Delete the word before the cursor
Ctrl + T	Swap the last two characters before the cursor