Jonathan Larkin marketneutral

## privateGPT_uichat.py
# Disclamer: This code is not written by me. Its taken from https://github.com/imartinez/privateGPT/pull/91.
# All credit goes to `vnk8071` as I mentioned in the video.
# As this code was still in the pull request while I was creating the video, did some modifications so that it works for me locally.

import gradio as gr
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import RetrievalQA
from langchain.embeddings import LlamaCppEmbeddings
from langchain.llms import GPT4All, LlamaCpp
from langchain.vectorstores import Chroma

## pandas_tooltips.py
def make_tooltipped_df(df, tooltips: dict):
  """
  import pandas as pd
  from IPython.display import display, HTML

  # Sample DataFrame
  data = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
  df = pd.DataFrame(data)

  # Apply styles to the DataFrame

## list.md

      
              1 file
            
          
              804 forks
            
          
              38 comments
            
          
              1858 stars
            
          
                ih2502mk
                / list.md
            
            
              Last active
              July 29, 2024 23:14
            
              
                Quantopian Lectures Saved
              
          
    Lecture 1: Introduction to Research — [📝Lecture Notebooks] [▶️Video]

Lecture 2: Introduction to Python — [📝Lecture Notebooks] [▶️Video]

Lecture 3: Introduction to NumPy — [📝Lecture Notebooks] [▶️Video]

Lecture 4: Introduction to pandas — [📝Lecture Notebooks] [▶️Video]

Lecture 5: Plotting Data — [📝Lecture Notebooks] [[▶️Vide

  
## pandasparapply.py
from joblib import Parallel, delayed

def expesive_calc(df):
    df['new_col'] = ...
    return df

def apply_parallel(gdf, func):
    ret_list = Parallel(n_jobs=8)(delayed(func)(group) for name, group in gdf)
    return pd.concat(ret_list)

## install-docker.sh
# WARNING : This gist in the current form is a collection of command examples. Please exercise caution where mentioned.

# Docker
sudo apt-get update
sudo apt-get remove docker docker-engine docker.io
sudo apt install docker.io
sudo systemctl start docker
sudo systemctl enable docker
docker --version

## kaggledays-sf_h2o_automl_6000.R
### Kaggle Days SF: Hackathon submission (8th place)

# I used the latest version of H2O (3.24.0.1)
# Latest stable always here: http://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
# H2O 3.24.0.1: http://h2o-release.s3.amazonaws.com/h2o/rel-yates/1/index.html
# If you are a Python user, you can use the demo Python code available on the H2O AutoML User Guide
# instead: http://docs.h2o.ai/h2o/latest-stable/h2o-docs/automl.html

# Unfortunately it was a private competition, so the data is not publicly available!

## dask_sparse_corr.py
import dask
import dask.array as da
import dask.dataframe as dd
import sparse

@dask.delayed(pure=True)
def corr_on_chunked(chunk1, chunk2, corr_thresh=0.9):
    return sparse.COO.from_numpy((np.dot(chunk1, chunk2.T) > corr_thresh))

def chunked_corr_sparse_dask(data, chunksize=5000, corr_thresh=0.9):

## pad_packed_demo.py
import torch
from torch import LongTensor
from torch.nn import Embedding, LSTM
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

## We want to run LSTM on a batch of 3 character sequences ['long_str', 'tiny', 'medium']
#
#     Step 1: Construct Vocabulary
#     Step 2: Load indexed data (list of instances, where each instance is list of character indices)

## covariance_to_correlation.py
import numpy as np

def correlation_from_covariance(covariance):
    v = np.sqrt(np.diag(covariance))
    outer_v = np.outer(v, v)
    correlation = covariance / outer_v
    correlation[covariance == 0] = 0
    return correlation

## lmt.py
import math
import numpy as np
from sklearn.linear_model import Ridge


class LinearModelTree:
    def __init__(self, min_node_size, node_model_fit_func, min_split_improvement=0):
        self.min_node_size = min_node_size
        self.node_model_fit_func = node_model_fit_func
        self.min_split_improvement = min_split_improvement
	# Disclamer: This code is not written by me. Its taken from https://github.com/imartinez/privateGPT/pull/91.
	# All credit goes to `vnk8071` as I mentioned in the video.
	# As this code was still in the pull request while I was creating the video, did some modifications so that it works for me locally.

	import gradio as gr
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	from langchain.chains import RetrievalQA
	from langchain.embeddings import LlamaCppEmbeddings
	from langchain.llms import GPT4All, LlamaCpp
	from langchain.vectorstores import Chroma
	def make_tooltipped_df(df, tooltips: dict):
	"""
	import pandas as pd
	from IPython.display import display, HTML

	# Sample DataFrame
	data = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
	df = pd.DataFrame(data)

	# Apply styles to the DataFrame
	from joblib import Parallel, delayed

	def expesive_calc(df):
	df['new_col'] = ...
	return df

	def apply_parallel(gdf, func):
	ret_list = Parallel(n_jobs=8)(delayed(func)(group) for name, group in gdf)
	return pd.concat(ret_list)
	# WARNING : This gist in the current form is a collection of command examples. Please exercise caution where mentioned.

	# Docker
	sudo apt-get update
	sudo apt-get remove docker docker-engine docker.io
	sudo apt install docker.io
	sudo systemctl start docker
	sudo systemctl enable docker
	docker --version
	### Kaggle Days SF: Hackathon submission (8th place)

	# I used the latest version of H2O (3.24.0.1)
	# Latest stable always here: http://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
	# H2O 3.24.0.1: http://h2o-release.s3.amazonaws.com/h2o/rel-yates/1/index.html
	# If you are a Python user, you can use the demo Python code available on the H2O AutoML User Guide
	# instead: http://docs.h2o.ai/h2o/latest-stable/h2o-docs/automl.html

	# Unfortunately it was a private competition, so the data is not publicly available!
	import dask
	import dask.array as da
	import dask.dataframe as dd
	import sparse

	@dask.delayed(pure=True)
	def corr_on_chunked(chunk1, chunk2, corr_thresh=0.9):
	return sparse.COO.from_numpy((np.dot(chunk1, chunk2.T) > corr_thresh))

	def chunked_corr_sparse_dask(data, chunksize=5000, corr_thresh=0.9):
	import torch
	from torch import LongTensor
	from torch.nn import Embedding, LSTM
	from torch.autograd import Variable
	from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

	## We want to run LSTM on a batch of 3 character sequences ['long_str', 'tiny', 'medium']
	#
	# Step 1: Construct Vocabulary
	# Step 2: Load indexed data (list of instances, where each instance is list of character indices)
	import numpy as np

	def correlation_from_covariance(covariance):
	v = np.sqrt(np.diag(covariance))
	outer_v = np.outer(v, v)
	correlation = covariance / outer_v
	correlation[covariance == 0] = 0
	return correlation
	import math
	import numpy as np
	from sklearn.linear_model import Ridge


	class LinearModelTree:
	def __init__(self, min_node_size, node_model_fit_func, min_split_improvement=0):
	self.min_node_size = min_node_size
	self.node_model_fit_func = node_model_fit_func
	self.min_split_improvement = min_split_improvement