Aditya Soni AdityaSoni19031997

## 1.srp.py
"""
Single Responsibility Principle
“…You had one job” — Loki to Skurge in Thor: Ragnarok
A class should have only one job.
If a class has more than one responsibility, it becomes coupled.
A change to one responsibility results to modification of the other responsibility.
"""

class Animal:
    def __init__(self, name: str):

## README-python-framework-benchmark.md

      
              6 files
            
          
              9 forks
            
          
              9 comments
            
          
              52 stars
            
          
                nhymxu
                / README-python-framework-benchmark.md
            
            
              Last active
              March 27, 2024 00:45
            
              
                Flask vs Falcon vs FastAPI benchmark
              
          
    gunicorn run:app --workers=9
gunicorn run:app --workers=9 --worker-class=meinheld.gmeinheld.MeinheldWorker

Macbook Pro 2015
Python 3.7


Framework
Server
Req/s
Max latency
+/- Stdev


## PyTorch_bucket_by_sequence_length.py
"""
PyTorch has pack_padded_sequence this doesn’t work with dense layers. For sequence data with high variance in its length
the best way to minimize padding and masking within a batch is by feeding in data that is already grouped by sequence length
(while still shuffling it somewhat). Here is my current solution in numpy.
I will need to convert every function over to torch to allow it to run on the GPU and am sure there are many other
ways to optimize it further. Hope this helps others and that maybe it can become a new PyTorch Batch Sampler someday.

General approach to how it works:

Decide what your bucket boundaries for the data are.

## search_without_index.py
import numpy as np
import faiss


def search_knn(xq, xb, k, distance_type=faiss.METRIC_L2):
    """ wrapper around the faiss knn functions without index """
    nq, d = xq.shape
    nb, d2 = xb.shape
    assert d == d2

## structured_self_attention.py
# Implementation of Structured Self-Attention mechanism
# from Lin et al. 2017 (https://arxiv.org/pdf/1703.03130.pdf)
# Anton Melnikov

import torch
import torch.nn as nn


class StructuredAttention(nn.Module):
    def __init__(self, *, input_dim: int, hidden_dim: int, attention_hops: int):

## kaggle_av_nlp_preprocess.py
#from my repo
#https://github.com/AdityaSoni19031997/Machine-Learning/blob/master/AV/AV_Enigma_NLP_functional_api.ipynb

def preprocess_word(word):
    # Remove punctuation
    word = word.strip('\'"?!,.():;')
    # Convert more than 2 letter repetitions to 2 letter
    # funnnnny --> funny
    word = re.sub(r'(.)\1+', r'\1\1', word)
    # Remove - & '

## gradient_accumulation.py
model.zero_grad()                                   # Reset gradients tensors
for i, (inputs, labels) in enumerate(training_set):
    predictions = model(inputs)                     # Forward pass
    loss = loss_function(predictions, labels)       # Compute loss function
    loss = loss / accumulation_steps                # Normalize our loss (if averaged)
    loss.backward()                                 # Backward pass
    if (i+1) % accumulation_steps == 0:             # Wait for several backward steps
        optimizer.step()                            # Now we can do an optimizer step
        model.zero_grad()                           # Reset gradients tensors
        if (i+1) % evaluation_steps == 0:           # Evaluate the model when we...

## Eager_dataset_keras_practice.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                shang-vikas
                / Eager_dataset_keras_practice.ipynb
            
            
              Created
              July 3, 2018 13:56
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Function and decorator for compression
def mem_usage(pandas_obj):
	'''
	Estimates the memory usages by pandas object
  source: https://www.dataquest.io/blog/pandas-big-data/
	'''
	if isinstance(pandas_obj,pd.DataFrame):
	    usage_b = pandas_obj.memory_usage(deep=True).sum()
	else: # we assume if not a df it's a series
	    usage_b = pandas_obj.memory_usage(deep=True)
	usage_mb = usage_b / 1024 ** 2 # convert bytes to megabytes

## BigQuery_query_talkingdata.sql
-- Feature engineering BigQuery SQL queries for the kaggle talkingdata competition by tkm2261
-- it may acheve 0.9823 on the public LB with simple GBDT.

-- destination table: takling.test2
SELECT
  CASE WHEN t.click_id is null THEN -1 ELSE t.click_id END as click_id,
  o.*
FROM
  `talking.test_supplement` as o
LEFT OUTER JOIN
	"""
	Single Responsibility Principle
	“…You had one job” — Loki to Skurge in Thor: Ragnarok
	A class should have only one job.
	If a class has more than one responsibility, it becomes coupled.
	A change to one responsibility results to modification of the other responsibility.
	"""

	class Animal:
	def __init__(self, name: str):
	"""
	PyTorch has pack_padded_sequence this doesn’t work with dense layers. For sequence data with high variance in its length
	the best way to minimize padding and masking within a batch is by feeding in data that is already grouped by sequence length
	(while still shuffling it somewhat). Here is my current solution in numpy.
	I will need to convert every function over to torch to allow it to run on the GPU and am sure there are many other
	ways to optimize it further. Hope this helps others and that maybe it can become a new PyTorch Batch Sampler someday.

	General approach to how it works:

	Decide what your bucket boundaries for the data are.
	import numpy as np
	import faiss



	def search_knn(xq, xb, k, distance_type=faiss.METRIC_L2):
	""" wrapper around the faiss knn functions without index """
	nq, d = xq.shape
	nb, d2 = xb.shape
	assert d == d2
	# Implementation of Structured Self-Attention mechanism
	# from Lin et al. 2017 (https://arxiv.org/pdf/1703.03130.pdf)
	# Anton Melnikov

	import torch
	import torch.nn as nn


	class StructuredAttention(nn.Module):
	def __init__(self, *, input_dim: int, hidden_dim: int, attention_hops: int):
	#from my repo
	#https://github.com/AdityaSoni19031997/Machine-Learning/blob/master/AV/AV_Enigma_NLP_functional_api.ipynb

	def preprocess_word(word):
	# Remove punctuation
	word = word.strip('\'"?!,.():;')
	# Convert more than 2 letter repetitions to 2 letter
	# funnnnny --> funny
	word = re.sub(r'(.)\1+', r'\1\1', word)
	# Remove - & '
	model.zero_grad() # Reset gradients tensors
	for i, (inputs, labels) in enumerate(training_set):
	predictions = model(inputs) # Forward pass
	loss = loss_function(predictions, labels) # Compute loss function
	loss = loss / accumulation_steps # Normalize our loss (if averaged)
	loss.backward() # Backward pass
	if (i+1) % accumulation_steps == 0: # Wait for several backward steps
	optimizer.step() # Now we can do an optimizer step
	model.zero_grad() # Reset gradients tensors
	if (i+1) % evaluation_steps == 0: # Evaluate the model when we...
	def mem_usage(pandas_obj):
	'''
	Estimates the memory usages by pandas object
	source: https://www.dataquest.io/blog/pandas-big-data/
	'''
	if isinstance(pandas_obj,pd.DataFrame):
	usage_b = pandas_obj.memory_usage(deep=True).sum()
	else: # we assume if not a df it's a series
	usage_b = pandas_obj.memory_usage(deep=True)
	usage_mb = usage_b / 1024 ** 2 # convert bytes to megabytes
	-- Feature engineering BigQuery SQL queries for the kaggle talkingdata competition by tkm2261
	-- it may acheve 0.9823 on the public LB with simple GBDT.

	-- destination table: takling.test2
	SELECT
	CASE WHEN t.click_id is null THEN -1 ELSE t.click_id END as click_id,
	o.*
	FROM
	`talking.test_supplement` as o
	LEFT OUTER JOIN