nbertagnolli

## sklearn_lambda_handler.py
import boto3
import json
import os
import pickle


s3 = boto3.resource("s3")
BUCKET_NAME = "nic-sklearn-models"


## binance_price_scraper.js
const Apify = require('apify');
const axios = require('axios');

/**
 * Converts List of Lists of kline data from Binance to a list of
 * dictionaries.
 * @param  {[List[List[String]]} data     The raw data returned from binance
 * @param  {[String]}            exchange The name of the exchange to scrape
 * @return {[Dict[String, String]}
 */

## bert_emotions.ipynb

      
              1 file
            
          
              1 fork
            
          
              4 comments
            
          
              0 stars
            
          
                nbertagnolli
                / bert_emotions.ipynb
            
            
              Last active
              November 23, 2021 05:05
            
              
                Holds the code for https://towardsdatascience.com/build-a-bert-sci-kit-transformer-59d60ddd54a5
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## feature_importance.ipynb

      
              1 file
            
          
              2 forks
            
          
              0 comments
            
          
              1 star
            
          
                nbertagnolli
                / feature_importance.ipynb
            
            
              Created
              October 12, 2020 05:11
            
              
                Gist for medium article...
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## extract_feature_names.py
def extract_feature_names(model, name) -> List[str]:
  """Extracts the feature names from arbitrary sklearn models

  Args:
    model: The Sklearn model, transformer, clustering algorithm, etc. which we want to get named features for.
    name: The name of the current step in the pipeline we are at.

  Returns:
    The list of feature names.  If the model does not have named features it constructs feature names
    by appending an index to the provided name.

## get_feature_names.py
from sklearn.pipeline import FeatureUnion, Pipeline

def get_feature_names(model, names: List[str], name: str) -> List[str]:
    """Thie method extracts the feature names in order from a Sklearn Pipeline

    This method only works with composed Pipelines and FeatureUnions.  It will
    pull out all names using DFS from a model.

    Args:
        model: The model we are interested in

## language_transformer_fast.py
from typing import List, Optional, Set
from sklearn.base import BaseEstimator, TransformerMixin
import fasttext
from transformers import MarianTokenizer, MarianMTModel
import os
import requests

class LanguageTransformerFast(BaseEstimator, TransformerMixin):
    def __init__(
        self,

## english_transformer.py
from typing import List, Optional
from sklearn.base import BaseEstimator, TransformerMixin
import fasttext
from transformers import MarianTokenizer, MarianMTModel
import os

class EnglishTransformer(BaseEstimator, TransformerMixin):

    def __init__(self,
                 fasttext_model_path: str="/tmp/lid.176.bin",

## predict_language.py
from typing import List
import os
import requests
import fasttext


def get_language(texts: List[str]) -> List[str]:
    """Predicts the languge code for each text in a list

    Args:

## split_on_date.py
def split_on_date(data: pd.DataFrame, train_percent: float=0.9, seed: int=1234):
  """Splits a DataFrame into train and validation sets based on the date.

  Args:
    data: The data we want to split.  It must contain a date column.
    train_percent: The percent of data to use for training
    seed: The random seed to use for selecting the sets

  Returns:
    data: A DataFrame with a new split column with values 'train' and 'val'.
	import boto3
	import json
	import os
	import pickle


	s3 = boto3.resource("s3")
	BUCKET_NAME = "nic-sklearn-models"
	const Apify = require('apify');
	const axios = require('axios');

	/**
	* Converts List of Lists of kline data from Binance to a list of
	* dictionaries.
	* @param {[List[List[String]]} data The raw data returned from binance
	* @param {[String]} exchange The name of the exchange to scrape
	* @return {[Dict[String, String]}
	*/
	def extract_feature_names(model, name) -> List[str]:
	"""Extracts the feature names from arbitrary sklearn models

	Args:
	model: The Sklearn model, transformer, clustering algorithm, etc. which we want to get named features for.
	name: The name of the current step in the pipeline we are at.

	Returns:
	The list of feature names. If the model does not have named features it constructs feature names
	by appending an index to the provided name.
	from sklearn.pipeline import FeatureUnion, Pipeline

	def get_feature_names(model, names: List[str], name: str) -> List[str]:
	"""Thie method extracts the feature names in order from a Sklearn Pipeline

	This method only works with composed Pipelines and FeatureUnions. It will
	pull out all names using DFS from a model.

	Args:
	model: The model we are interested in
	from typing import List, Optional, Set
	from sklearn.base import BaseEstimator, TransformerMixin
	import fasttext
	from transformers import MarianTokenizer, MarianMTModel
	import os
	import requests

	class LanguageTransformerFast(BaseEstimator, TransformerMixin):
	def __init__(
	self,
	from typing import List, Optional
	from sklearn.base import BaseEstimator, TransformerMixin
	import fasttext
	from transformers import MarianTokenizer, MarianMTModel
	import os

	class EnglishTransformer(BaseEstimator, TransformerMixin):

	def __init__(self,
	fasttext_model_path: str="/tmp/lid.176.bin",
	from typing import List
	import os
	import requests
	import fasttext


	def get_language(texts: List[str]) -> List[str]:
	"""Predicts the languge code for each text in a list

	Args:
	def split_on_date(data: pd.DataFrame, train_percent: float=0.9, seed: int=1234):
	"""Splits a DataFrame into train and validation sets based on the date.

	Args:
	data: The data we want to split. It must contain a date column.
	train_percent: The percent of data to use for training
	seed: The random seed to use for selecting the sets

	Returns:
	data: A DataFrame with a new split column with values 'train' and 'val'.