Sean Benhur seanbenhur

## summarizer_word_embedding.py
import nltk
import re
import string
from gensim.models import Word2Vec
from nltk.tokenize import sent_tokenize as nlkt_sent_tokenize
from nltk.tokenize import word_tokenize as nlkt_word_tokenize
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from nltk.corpus import stopwords
import numpy as np
from scipy.spatial.distance import cosine

## avoid_colab_close.js
//credit - https://huggingface.co/blog/fine-tune-wav2vec2-english (Patrick von Platen)

// run this on your Chrome / Browser Console (where Colab is present)

function ConnectButton(){
    console.log("Connect pushed");
    document.querySelector("#top-toolbar > colab-connect-button").shadowRoot.querySelector("#connect").click()
}

var colab = setInterval(ConnectButton,60000);

## j6b_train_hf_ds.py
#  So now you want to finetune that GPT-J-6B on a 3090/TITAN GPU ... okay
#  More exploratory coding. It uses the Huggingface model port, deepspeed and reads all text/md files from a target directory
#  It is a fragment of a larger system with remote editing, but that's another story
#  This is the raw, training tester. Items to look out for:
#  - uses DeepSpeed and has a DS config
#  - to save space uses SGD instead of ADAM
#  - uses gradient checkpointing
#  - freezes 25% of the layers to fit

# Assumes you can already run https://gist.github.com/kinoc/2d636a68876cd3de7b6e9c9452b61089

## since2010.md

      
              1 file
            
          
              2 forks
            
          
              33 comments
            
          
              102 stars
            
          
                shawwn
                / since2010.md
            
            
              Created
              May 11, 2021 09:46
            
              
                "What happened after 2010?"
              
          
    This was a response to a Hacker News
comment asking me what
I've been up to since 2010. I'm posting it here since HN rejects it
with "that comment is too long." I suppose that's fair, since
this ended up being something of an autobiography.
--

What happened after 2010?


## streamlit_colab.ipynb

      
              1 file
            
          
              9 forks
            
          
              27 comments
            
          
              27 stars
            
          
                tuffacton
                / streamlit_colab.ipynb
            
            
              Last active
              March 7, 2024 05:47
            
              
                Colaboratory Notebook that hosts a streamlit app and creates an ngrok https tunnel for access.
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## dynamic_rnn.py
import torch
from torch import nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


class DynamicRNN(nn.Module):
	"""
	The wrapper version of recurrent modules including RNN, LSTM
	that support packed sequence batch.
	"""

## tfpdf.py
from __future__ import division
import string
import math

tokenize = lambda doc: doc.lower().split(" ")

document_0 = "China has a strong economy that is growing at a rapid pace. However politically it differs greatly from the US Economy."
document_1 = "At last, China seems serious about confronting an endemic problem: domestic violence and corruption."
document_2 = "Japan's prime minister, Shinzo Abe, is working towards healing the economic turmoil in his own country for his view on the future of his people."
document_3 = "Vladimir Putin is working hard to fix the economy in Russia as the Ruble has tumbled."
	import nltk
	import re
	import string
	from gensim.models import Word2Vec
	from nltk.tokenize import sent_tokenize as nlkt_sent_tokenize
	from nltk.tokenize import word_tokenize as nlkt_word_tokenize
	from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
	from nltk.corpus import stopwords
	import numpy as np
	from scipy.spatial.distance import cosine
	//credit - https://huggingface.co/blog/fine-tune-wav2vec2-english (Patrick von Platen)

	// run this on your Chrome / Browser Console (where Colab is present)

	function ConnectButton(){
	console.log("Connect pushed");
	document.querySelector("#top-toolbar > colab-connect-button").shadowRoot.querySelector("#connect").click()
	}

	var colab = setInterval(ConnectButton,60000);
	# So now you want to finetune that GPT-J-6B on a 3090/TITAN GPU ... okay
	# More exploratory coding. It uses the Huggingface model port, deepspeed and reads all text/md files from a target directory
	# It is a fragment of a larger system with remote editing, but that's another story
	# This is the raw, training tester. Items to look out for:
	# - uses DeepSpeed and has a DS config
	# - to save space uses SGD instead of ADAM
	# - uses gradient checkpointing
	# - freezes 25% of the layers to fit

	# Assumes you can already run https://gist.github.com/kinoc/2d636a68876cd3de7b6e9c9452b61089
	import torch
	from torch import nn
	from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


	class DynamicRNN(nn.Module):
	"""
	The wrapper version of recurrent modules including RNN, LSTM
	that support packed sequence batch.
	"""
	from __future__ import division
	import string
	import math

	tokenize = lambda doc: doc.lower().split(" ")

	document_0 = "China has a strong economy that is growing at a rapid pace. However politically it differs greatly from the US Economy."
	document_1 = "At last, China seems serious about confronting an endemic problem: domestic violence and corruption."
	document_2 = "Japan's prime minister, Shinzo Abe, is working towards healing the economic turmoil in his own country for his view on the future of his people."
	document_3 = "Vladimir Putin is working hard to fix the economy in Russia as the Ruble has tumbled."