Pier lppier

## gist:3a11c5c54b18d2b33126da7eb3ee5b66

RVC-Mangio
https://github.com/Mangio621/Mangio-RVC-Fork/releases

Google Doc for MacOS Install Instructions
https://docs.google.com/document/d/1KKKE7hoyGXMw-Lg0JWx16R8xz3OfxADjwEYJTqzDO1k/edit#heading=h.8vqd8m4fh76q

Voice Models
https://voice-models.com/

## nextpow2
def nextpow2(N):
    """ Function for finding the next power of 2 """
    n = 1
    while n < N: n *= 2
    return n

## Using PCA to represent word vectors in 2D.py
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
#embedder = SentenceTransformer('all-MiniLM-L6-v2')
corpus = tfidf_sum.index.to_list()
corpus_embeddings = embedder.encode(corpus)

# Perform kmean clustering
num_clusters = 8
clustering_model = KMeans(n_clusters=num_clusters, random_state=42, init='k-means++')

## redis_python_dict.py
import redis
import json

# Retrieve Redis Stored Values, if any for faster processing
anc_dict_320 = {}
r = redis.Redis(host='localhost', port=6379, db=0)
json_str_320 = r.get("anc_dict_320")
if json_str_320 is not None:
    anc_dict_320 = json.loads(json_str_320)
    print(anc_dict_320)

## add_sagemaker_kernel.sh
conda create -n  my_custom_python_39 python=3.9 -y
cd ~/SageMaker
source activate my_custom_python_39
pip install virtualenv
virtualenv my_custom_python_39_venv
conda deactivate
source my_custom_python_39_venv/bin/activate
pip install -r <your_project_folder>/requirements.txt
pip install ipykernel
python -m ipykernel install --user --name=my_custom_python_39

## gist:1548d7b93cc8d32955ea4db078af91b6
conda create -n  my_custom_python_39 python=3.9 -y
cd ~/SageMaker
source activate my_custom_python_39
pip install virtualenv
virtualenv my_custom_python_39_venv
conda deactivate
source my_custom_python_39_venv/bin/activate
pip install ipykernel -y
python -m ipykernel install --user --name=my_custom_python_39

## regex_cheatsheet.py
# Remove URLs
text_clean = [re.sub(r'http\S+', '', t) for t in text]

# Remove new lines \n
text_clean= [t.strip().replace('\n', ' ') for t in text_clean]

# Remove emails
text_clean = [re.sub(r'[\w\.-]+@[\w\.-]+', '', t) for t in text_clean]

# Remove single quotes

## Comprehend_SentimentCustomClassifier.py
from pathlib import Path
from sklearn.model_selection import train_test_split

# IMDB Dataset can be found here
# wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
# tar -xf aclImdb_v1.tar.gz


def read_imdb_split(split_dir):
    split_dir = Path(split_dir)

## HF_CustomSentimentClassifier.py
from pathlib import Path
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizerFast
import torch
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# IMDB Dataset can be found here
# wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz

## gist:a117e2f1702b36c65350987ccf73659b
" enable syntax highlighting
syntax enable

" show line numbers
set number

" set tabs to have 4 spaces
set ts=4

" indent when moving to the next line while writing code

	RVC-Mangio
	https://github.com/Mangio621/Mangio-RVC-Fork/releases

	Google Doc for MacOS Install Instructions
	https://docs.google.com/document/d/1KKKE7hoyGXMw-Lg0JWx16R8xz3OfxADjwEYJTqzDO1k/edit#heading=h.8vqd8m4fh76q

	Voice Models
	https://voice-models.com/
	def nextpow2(N):
	""" Function for finding the next power of 2 """
	n = 1
	while n < N: n *= 2
	return n
	from sentence_transformers import SentenceTransformer
	from sklearn.cluster import KMeans
	embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
	#embedder = SentenceTransformer('all-MiniLM-L6-v2')
	corpus = tfidf_sum.index.to_list()
	corpus_embeddings = embedder.encode(corpus)

	# Perform kmean clustering
	num_clusters = 8
	clustering_model = KMeans(n_clusters=num_clusters, random_state=42, init='k-means++')
	import redis
	import json

	# Retrieve Redis Stored Values, if any for faster processing
	anc_dict_320 = {}
	r = redis.Redis(host='localhost', port=6379, db=0)
	json_str_320 = r.get("anc_dict_320")
	if json_str_320 is not None:
	anc_dict_320 = json.loads(json_str_320)
	print(anc_dict_320)
	conda create -n my_custom_python_39 python=3.9 -y
	cd ~/SageMaker
	source activate my_custom_python_39
	pip install virtualenv
	virtualenv my_custom_python_39_venv
	conda deactivate
	source my_custom_python_39_venv/bin/activate
	pip install -r <your_project_folder>/requirements.txt
	pip install ipykernel
	python -m ipykernel install --user --name=my_custom_python_39
	# Remove URLs
	text_clean = [re.sub(r'http\S+', '', t) for t in text]

	# Remove new lines \n
	text_clean= [t.strip().replace('\n', ' ') for t in text_clean]

	# Remove emails
	text_clean = [re.sub(r'[\w\.-]+@[\w\.-]+', '', t) for t in text_clean]

	# Remove single quotes
	from pathlib import Path
	from sklearn.model_selection import train_test_split

	# IMDB Dataset can be found here
	# wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
	# tar -xf aclImdb_v1.tar.gz


	def read_imdb_split(split_dir):
	split_dir = Path(split_dir)
	from pathlib import Path
	from sklearn.model_selection import train_test_split
	from transformers import DistilBertTokenizerFast
	import torch
	from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
	import torch.nn.functional as F
	from sklearn.metrics import accuracy_score, precision_recall_fscore_support

	# IMDB Dataset can be found here
	# wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
	" enable syntax highlighting
	syntax enable

	" show line numbers
	set number

	" set tabs to have 4 spaces
	set ts=4

	" indent when moving to the next line while writing code