Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
![Screenshot 2023-12-18 at 10 40 27 PM](https://private-user-images.githubusercontent.com/3837836/291468646-4c30ad72-76ee-4939-a5fb-16b570d38cf2.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MjE3NDE2NDgsIm5iZiI6MTcyMTc0MTM0OCwicGF0aCI6Ii8zODM3ODM2LzI5MTQ2ODY0Ni00YzMwYWQ3Mi03NmVlLTQ5MzktYTVmYi0xNmI1NzBkMzhjZjIucG5nP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI0MDcyMyUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNDA3MjNUMTMyOTA4WiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9MzcwY2JiYTQ3NmZiNDljZTNkM2VmODQ4YTk1MzZiMDI2MWFkOGIwN2RmZTdiODY4YTlhOTczYzM5ZDdiZTRlMyZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QmYWN0b3JfaWQ9MCZrZXlfaWQ9MCZyZXBvX2lkPTAifQ.Mnd0fOFMDw8U43pFGx9Rimw6XOOUoobvOmIvIqwi674)
# need install asconnect: pip install asconnect, and openai: pip install openai | |
import asconnect | |
import os | |
import openai | |
#api key: https://developer.apple.com/documentation/appstoreconnectapi/creating_api_keys_for_app_store_connect_api | |
APPCONN_APIKEY_ID = "xxxxx" | |
APPCONN_ISSUER_ID = "xxxxxxx" | |
APPCONN_KEY_FILE = "xxxxx.p8" |
# ------------------------------------------------------------------ | |
# EDIT: I eventually found a faster way to run SD on macOS, via MPSGraph (~0.8s / step on M1 Pro): | |
# https://github.com/madebyollin/maple-diffusion | |
# The original CoreML-related code & discussion is preserved below :) | |
# ------------------------------------------------------------------ | |
# you too can run stable diffusion on the apple silicon GPU (no ANE sadly) | |
# | |
# quick test portraits (each took 50 steps x 2s / step ~= 100s on my M1 Pro): | |
# * https://i.imgur.com/5ywISvm.png |
import requests | |
import os, sys | |
import concurrent.futures | |
from itertools import repeat | |
class XimaScraper: | |
def __init__(self, album_no, page_num): | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0' | |
} |
/** Cosine similarity **/ | |
private func cosineSim(A: [Double], B: [Double]) -> Double { | |
return dot(A: A, B: B) / (magnitude(A: A) * magnitude(A: B)) | |
} | |
/** Dot Product **/ | |
private func dot(A: [Double], B: [Double]) -> Double { | |
var x: Double = 0 | |
for i in 0...A.count-1 { | |
x += A[i] * B[i] |
本文介绍如何提取提取声学特征用于Merlin训练。在语音合成中,属于声码器(vocoder)的内容。
Merlin可以使用两种vocoder,STRAIGHT
或WORLD
。WORLD
的目标是提取60-dim MGC, variable-dim BAP (BAP dim: 1 for 16Khz, 5 for 48Khz), 1-dim LF0;STRAIGHT
的目标是提取60-dim MGC, 25-dim BAP, 1-dim LF0。
新版本的WORLD_v2
还在开发中,目标是提取60-dim MGC, 5-dim BAP, 1-dim LF0(MGC和BAP的维度支持微调)。
由于STRAIGHT
的使用有严格的证书限制,本文,主要介绍WORLD
。
%!TEX program = xelatex | |
% Font Size: | |
% 10pt, 11pt, 12pt | |
% Paper Size: | |
% a4paper, letterpaper, a5paper, leagalpaper, executivepaper, landscape | |
% Font Family: | |
% roman, sans | |
\documentclass[12pt, a4paper, roman]{moderncv} | |
% Style: |
""" | |
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) | |
BSD License | |
""" | |
import numpy as np | |
# data I/O | |
data = open('input.txt', 'r').read() # should be simple plain text file | |
chars = list(set(data)) | |
data_size, vocab_size = len(data), len(chars) |