Pier lppier

## gist:3a11c5c54b18d2b33126da7eb3ee5b66

RVC-Mangio
https://github.com/Mangio621/Mangio-RVC-Fork/releases

Google Doc for MacOS Install Instructions
https://docs.google.com/document/d/1KKKE7hoyGXMw-Lg0JWx16R8xz3OfxADjwEYJTqzDO1k/edit#heading=h.8vqd8m4fh76q

Voice Models
https://voice-models.com/

## Using PCA to represent word vectors in 2D.py
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
#embedder = SentenceTransformer('all-MiniLM-L6-v2')
corpus = tfidf_sum.index.to_list()
corpus_embeddings = embedder.encode(corpus)

# Perform kmean clustering
num_clusters = 8
clustering_model = KMeans(n_clusters=num_clusters, random_state=42, init='k-means++')

## add_sagemaker_kernel.sh
conda create -n  my_custom_python_39 python=3.9 -y
cd ~/SageMaker
source activate my_custom_python_39
pip install virtualenv
virtualenv my_custom_python_39_venv
conda deactivate
source my_custom_python_39_venv/bin/activate
pip install -r <your_project_folder>/requirements.txt
pip install ipykernel
python -m ipykernel install --user --name=my_custom_python_39

## gist:1548d7b93cc8d32955ea4db078af91b6
conda create -n  my_custom_python_39 python=3.9 -y
cd ~/SageMaker
source activate my_custom_python_39
pip install virtualenv
virtualenv my_custom_python_39_venv
conda deactivate
source my_custom_python_39_venv/bin/activate
pip install ipykernel -y
python -m ipykernel install --user --name=my_custom_python_39

## regex_cheatsheet.py
# Remove URLs
text_clean = [re.sub(r'http\S+', '', t) for t in text]

# Remove new lines \n
text_clean= [t.strip().replace('\n', ' ') for t in text_clean]

# Remove emails
text_clean = [re.sub(r'[\w\.-]+@[\w\.-]+', '', t) for t in text_clean]

# Remove single quotes

## Comprehend_SentimentCustomClassifier.py
from pathlib import Path
from sklearn.model_selection import train_test_split

# IMDB Dataset can be found here
# wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
# tar -xf aclImdb_v1.tar.gz


def read_imdb_split(split_dir):
    split_dir = Path(split_dir)

## HF_CustomSentimentClassifier.py
from pathlib import Path
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizerFast
import torch
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# IMDB Dataset can be found here
# wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz

## gist:a117e2f1702b36c65350987ccf73659b
" enable syntax highlighting
syntax enable

" show line numbers
set number

" set tabs to have 4 spaces
set ts=4

" indent when moving to the next line while writing code

## countxlsx.py
import os

total_qns = 0
rootdir = '.'

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        path = os.path.join(subdir, file)
        print(path)
        if path.endswith('.xlsx'):

## detect_percentage_english.py
import string
import urllib.request
from nltk.corpus import words

punctuation = set(string.punctuation)

def remove_punc(str):
    return ''.join(c for c in str if c not in punctuation)

total_count = 0

	RVC-Mangio
	https://github.com/Mangio621/Mangio-RVC-Fork/releases

	Google Doc for MacOS Install Instructions
	https://docs.google.com/document/d/1KKKE7hoyGXMw-Lg0JWx16R8xz3OfxADjwEYJTqzDO1k/edit#heading=h.8vqd8m4fh76q

	Voice Models
	https://voice-models.com/
	from sentence_transformers import SentenceTransformer
	from sklearn.cluster import KMeans
	embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
	#embedder = SentenceTransformer('all-MiniLM-L6-v2')
	corpus = tfidf_sum.index.to_list()
	corpus_embeddings = embedder.encode(corpus)

	# Perform kmean clustering
	num_clusters = 8
	clustering_model = KMeans(n_clusters=num_clusters, random_state=42, init='k-means++')
	conda create -n my_custom_python_39 python=3.9 -y
	cd ~/SageMaker
	source activate my_custom_python_39
	pip install virtualenv
	virtualenv my_custom_python_39_venv
	conda deactivate
	source my_custom_python_39_venv/bin/activate
	pip install -r <your_project_folder>/requirements.txt
	pip install ipykernel
	python -m ipykernel install --user --name=my_custom_python_39
	# Remove URLs
	text_clean = [re.sub(r'http\S+', '', t) for t in text]

	# Remove new lines \n
	text_clean= [t.strip().replace('\n', ' ') for t in text_clean]

	# Remove emails
	text_clean = [re.sub(r'[\w\.-]+@[\w\.-]+', '', t) for t in text_clean]

	# Remove single quotes
	from pathlib import Path
	from sklearn.model_selection import train_test_split

	# IMDB Dataset can be found here
	# wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
	# tar -xf aclImdb_v1.tar.gz


	def read_imdb_split(split_dir):
	split_dir = Path(split_dir)
	from pathlib import Path
	from sklearn.model_selection import train_test_split
	from transformers import DistilBertTokenizerFast
	import torch
	from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
	import torch.nn.functional as F
	from sklearn.metrics import accuracy_score, precision_recall_fscore_support

	# IMDB Dataset can be found here
	# wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
	" enable syntax highlighting
	syntax enable

	" show line numbers
	set number

	" set tabs to have 4 spaces
	set ts=4

	" indent when moving to the next line while writing code
	import os

	total_qns = 0
	rootdir = '.'

	for subdir, dirs, files in os.walk(rootdir):
	for file in files:
	path = os.path.join(subdir, file)
	print(path)
	if path.endswith('.xlsx'):
	import string
	import urllib.request
	from nltk.corpus import words

	punctuation = set(string.punctuation)

	def remove_punc(str):
	return ''.join(c for c in str if c not in punctuation)

	total_count = 0