Skip to content

Instantly share code, notes, and snippets.

@lpabon
Created June 29, 2023 19:38
Show Gist options
  • Save lpabon/e0505b6bb709bc35c60fb7b3af535a09 to your computer and use it in GitHub Desktop.
Save lpabon/e0505b6bb709bc35c60fb7b3af535a09 to your computer and use it in GitHub Desktop.
diff --git a/.gitignore b/.gitignore
index cd5cc6c..13466b5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,7 @@ venv
__pycache__
*.pyc
*.txt
+*.wav
+*.mp3
!requirements.txt
+*.diff
diff --git a/ai_presenter/ai_presenter.py b/ai_presenter/ai_presenter.py
index 18c6df5..d20e135 100644
--- a/ai_presenter/ai_presenter.py
+++ b/ai_presenter/ai_presenter.py
@@ -19,7 +19,7 @@ class AIPresenter:
textai = self.generator.get_text()
for key, scene in self.database.scenes.items():
logging.info(f"Working on scene: {scene.name} in " +
- f"{scene.location}")
+ f"{scene.location}")
output = textai.generate(scene)
file.write(output + '\n')
diff --git a/ai_presenter/config/voice.py b/ai_presenter/config/voice.py
index c4ea3bd..646243b 100644
--- a/ai_presenter/config/voice.py
+++ b/ai_presenter/config/voice.py
@@ -2,8 +2,8 @@
# VoiceConfig can have passed in name, gender, age,
# accent, and accent_strength and have these initialized
class VoiceConfig:
- def __init__(self, name, gender, age, accent,
- accent_strength, description):
+ def __init__(self, name='', gender='', age='', accent='',
+ accent_strength=0, description=''):
self.name = name
self.gender = gender
self.age = age
diff --git a/ai_presenter/database.py b/ai_presenter/database.py
index 02e05d4..7d8110e 100644
--- a/ai_presenter/database.py
+++ b/ai_presenter/database.py
@@ -1,3 +1,4 @@
+import logging
from ai_presenter.config.config import Config
@@ -28,6 +29,7 @@ class Actor:
self.height = data['height']
self.gender = data['gender']
self.accent = data['accent']
+ logging.info(f'actor: name:{self.name} gender:{self.gender}')
class Scene:
diff --git a/ai_presenter/text_ai/chatgpt.py b/ai_presenter/text_ai/chatgpt.py
index 1278faf..a69faad 100644
--- a/ai_presenter/text_ai/chatgpt.py
+++ b/ai_presenter/text_ai/chatgpt.py
@@ -17,7 +17,7 @@ class TextChatGPT(TextAi):
"content": "You will be provided with a set of characters, " +
"their description, and a scene in JSON format. " +
"Create dialogue using the plot and characters " +
- "provided and return it in JSON format."
+ "provided and return it in JSON format. Add a narrator with key 'narrator' describing the characters, scene, and emotions"
},
{
"role": "user",
@@ -33,10 +33,7 @@ class TextChatGPT(TextAi):
},
{
"role": "assistant",
- "content": '{"dialogue":[{"speaker":"Max Doe","message"' +
- ':"Joana, I must say, your taste in bagels is utterly ' +
- 'appalling!"},{"speaker":"Joana Smith","message":' +
- '"Max, you are right."}]};'
+ "content": '{"dialogue":[{"speaker":"narrator","message":"Max stood close to Joana."},{"speaker":"Max Doe","message":"Joana, I must say, your taste in bagels is utterly appalling!"},{"speaker":"Joana Smith","message":"Max, you are right."},{"speaker":"narrator","message":"Finally Max was happy."}]}'
}
]
@@ -53,21 +50,49 @@ class TextChatGPT(TextAi):
self.messages.append(
{"role": "user", "content": json.dumps(self.user_message)}
)
- completion = openai.ChatCompletion.create(
- model="gpt-3.5-turbo",
- messages=self.messages,
- )
- # clear for next time
- # self.messages = []
+
self.user_message = {}
+ messages = self.messages
+ full_resp = ""
- resp = completion.choices[0].message.content
- self.messages.append(
- {"role": "assistant", "content": resp}
- )
- logging.info("Recieved " + resp)
- resp = json_trim(resp)
- try:
- return json.dumps(json.loads(resp))
- except Exception:
- return "{}"
+ count = 5
+ while count > 0:
+ count -= 1
+ completion = openai.ChatCompletion.create(
+ model="gpt-3.5-turbo",
+ messages=messages,
+ )
+
+ resp = completion.choices[0].message.content
+ finish_reason = completion.choices[0].finish_reason
+ full_resp += resp
+
+ logging.info(">> Recieved: " + resp)
+ logging.info(">> Have: " + full_resp)
+ logging.info(">> finish_reason: " + completion.choices[0].finish_reason)
+ if finish_reason == 'stop':
+ logging.info('chatgpt: got all info')
+ self.messages.append(
+ {"role": "assistant", "content": full_resp}
+ )
+ try:
+ resp = json_trim(resp)
+ return json.dumps(json.loads(resp))
+ except Exception:
+ return "{}"
+ elif finish_reason == 'length':
+ logging.info(f'chatgpt: need more info, retrying, usage:{completion.choices[0].usage} count:{count}')
+ messages += [
+ {"role": "assistant", "content": ""},
+ ]
+ else:
+ raise Exception("finish reason is " + finish_reason)
+
+ ## DEBUG
+ try:
+ json.dumps(resp)
+ except Exception:
+ logging.critical("******* >> resp is not json but finish_reason: " + completion.choices[0].finish_reason)
+
+
+ raise Exception('Tried too many times to talk to ChatGPT')
diff --git a/ai_presenter/voice_ai/base.py b/ai_presenter/voice_ai/base.py
index f03562f..48a76e2 100644
--- a/ai_presenter/voice_ai/base.py
+++ b/ai_presenter/voice_ai/base.py
@@ -1,6 +1,6 @@
from ai_presenter.database import Database
from ai_presenter.config.voice import VoiceConfig
-from elevenlabs import Iterator, Voice
+from elevenlabs import Iterator, Voice, Voices
import json
import logging
@@ -39,26 +39,27 @@ class VoiceAI:
def create_character_db(self, line: str):
json_string = line.strip()
- data = (json.loads(json_string))
+ data = json.loads(json_string)
for message in data['dialogue']:
name = message['speaker']
+
if name not in self.characters:
+ logging.info(f"creating character {name}")
try:
character_config = VoiceConfig(name,
- self.actors[name].gender,
- self.actors[name].age,
- self.actors[name].accent,
- 1.99,
- self.actors[name].
- description)
+ self.actors[name].gender,
+ self.actors[name].age,
+ self.actors[name].accent,
+ 1.99,
+ self.actors[name].description)
self.characters[name] = self.new_actor(character_config)
except Exception:
character_config = VoiceConfig(name,
- 'male',
- 'middle_aged',
- "british",
- 1.99,
- f"This is the {name}")
+ 'male',
+ 'middle_aged',
+ "british",
+ 1.99,
+ f"This is the {name}")
self.characters[name] = self.new_actor(character_config)
return data
diff --git a/ai_presenter/voice_ai/elevenlabs.py b/ai_presenter/voice_ai/elevenlabs.py
index 52aa8df..fb080ca 100644
--- a/ai_presenter/voice_ai/elevenlabs.py
+++ b/ai_presenter/voice_ai/elevenlabs.py
@@ -7,6 +7,21 @@ import logging
import os
+class VoiceAIDefaultActorElevenLabs(VoiceAIActor):
+ def __init__(self, config: VoiceConfig, voice: Voice):
+ super().__init__(config)
+ self.voice = voice
+
+ # .says takes the message and generates audio from that message
+ # note: for the real voiceaiactor class, the elevenlabs generate
+ # methods return raw data called audio which can be manipulated before
+ # saving to a file(ie. concatenation)
+ def says(self, message) -> (bytes | Iterator[bytes]):
+ logging.info(f'{self.name} says {message}')
+ audio = generate(text=message, model="eleven_monolingual_v1",
+ voice=self.voice)
+ return audio
+
class VoiceAIActorElevenLabs(VoiceAIActor):
def __init__(self, config: VoiceConfig):
super().__init__(config)
@@ -16,7 +31,7 @@ class VoiceAIActorElevenLabs(VoiceAIActor):
f'I am {self.name}. I am a {self.age} year old ' + \
f'{self.gender} with a {self.accent} accent.'
- logging.info(f"designing a voice for {self.name}")
+ logging.info(f"designing a voice for {self.name} gender:{self.gender}")
self.voice_design = VoiceDesign(name=self.name,
text=self.sample_text,
gender=self.gender,
@@ -40,7 +55,12 @@ class ElevenLabs(VoiceAI):
def __init__(self, db: Database):
super().__init__(db)
- def new_actor(self, config):
+ def new_actor(self, config) -> VoiceAIActor:
+ if config.name == 'narrator':
+ return VoiceAIDefaultActorElevenLabs(
+ config,
+ Voice(voice_id='idofvoice', name='narrator'),
+ )
return VoiceAIActorElevenLabs(config)
# make narrator actor
@@ -68,7 +88,6 @@ class ElevenLabs(VoiceAI):
for message in data['dialogue']:
name = message['speaker']
text = message['message']
- logging.info('ElevenLabs: Stitching together audio')
audio += self.characters[name].says(text)
logging.info(f"ElevenLabs: Audio can be found in {output_file}")
save(audio, output_file)
diff --git a/ai_presenter/voice_ai/fake.py b/ai_presenter/voice_ai/fake.py
index abee3bc..89a6c2e 100644
--- a/ai_presenter/voice_ai/fake.py
+++ b/ai_presenter/voice_ai/fake.py
@@ -10,7 +10,7 @@ class VoiceAIActorFake(VoiceAIActor):
def __init__(self, config: VoiceConfig):
super().__init__(config)
- def says(self, message, emotion) -> (bytes | Iterator[bytes]):
+ def says(self, message) -> (bytes | Iterator[bytes]):
# .says takes the message and generates audio from that message
# this audio gets saved to a file
# personally don't think says needs a file passed to it bc
@@ -18,7 +18,7 @@ class VoiceAIActorFake(VoiceAIActor):
# methods return raw data called audio which can be manipulated before
# saving to a file(ie. concatenation)
logging.info(f'VoiceAIActorFake: {self.name} ' +
- f'says {message} in a {emotion} way')
+ f'says {message}')
audio = f'name: {self.name}\ngender: {self.gender}\n' + \
f'age: {self.age}\naccent: {self.accent}\n' + \
@@ -27,11 +27,10 @@ class VoiceAIActorFake(VoiceAIActor):
f'message: {message}\n\n'
return audio
- def __get_voice(self, emotion) -> Voice:
+ def __get_voice(self) -> Voice:
logging.info(f'I am {self.name}. I am a {self.age} year old ' +
f'{self.gender} with a {self.accent} accent. I am ' +
- f'currently speaking in a {emotion} tone because I' +
- f' am {emotion}')
+ f'currently speaking')
class VoiceAIFake(VoiceAI):
@@ -62,9 +61,8 @@ class VoiceAIFake(VoiceAI):
for message in data['dialogue']:
name = message['speaker']
text = message['message']
- emotion = message['emotion']
logging.info('VoiceAIFake: Stitching together audio')
- audio += self.characters[name].says(text, emotion)
+ audio += self.characters[name].says(text)
logging.info('VoiceAIFake: Generating audio file')
with open(output_file, 'w') as out:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment