lpabon/narrator.diff

## narrator.diff
diff --git a/.gitignore b/.gitignore
index cd5cc6c..13466b5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,7 @@ venv
 __pycache__
 *.pyc
 *.txt
+*.wav
+*.mp3
 !requirements.txt
+*.diff
diff --git a/ai_presenter/ai_presenter.py b/ai_presenter/ai_presenter.py
index 18c6df5..d20e135 100644
--- a/ai_presenter/ai_presenter.py
+++ b/ai_presenter/ai_presenter.py
@@ -19,7 +19,7 @@ class AIPresenter:
             textai = self.generator.get_text()
             for key, scene in self.database.scenes.items():
                 logging.info(f"Working on scene: {scene.name} in " +
-                             f"{scene.location}")
+                            f"{scene.location}")

                 output = textai.generate(scene)
                 file.write(output + '\n')
diff --git a/ai_presenter/config/voice.py b/ai_presenter/config/voice.py
index c4ea3bd..646243b 100644
--- a/ai_presenter/config/voice.py
+++ b/ai_presenter/config/voice.py
@@ -2,8 +2,8 @@
 # VoiceConfig can have passed in name, gender, age,
 # accent, and accent_strength and have these initialized
 class VoiceConfig:
-    def __init__(self, name, gender, age, accent,
-                 accent_strength, description):
+    def __init__(self, name='', gender='', age='', accent='',
+                 accent_strength=0, description=''):
         self.name = name
         self.gender = gender
         self.age = age
diff --git a/ai_presenter/database.py b/ai_presenter/database.py
index 02e05d4..7d8110e 100644
--- a/ai_presenter/database.py
+++ b/ai_presenter/database.py
@@ -1,3 +1,4 @@
+import logging
 from ai_presenter.config.config import Config


@@ -28,6 +29,7 @@ class Actor:
         self.height = data['height']
         self.gender = data['gender']
         self.accent = data['accent']
+        logging.info(f'actor: name:{self.name} gender:{self.gender}')


 class Scene:
diff --git a/ai_presenter/text_ai/chatgpt.py b/ai_presenter/text_ai/chatgpt.py
index 1278faf..a69faad 100644
--- a/ai_presenter/text_ai/chatgpt.py
+++ b/ai_presenter/text_ai/chatgpt.py
@@ -17,7 +17,7 @@ class TextChatGPT(TextAi):
                 "content": "You will be provided with a set of characters, " +
                 "their description, and a scene in JSON format. " +
                 "Create dialogue using the plot and characters " +
-                "provided and return it in JSON format."
+                "provided and return it in JSON format. Add a narrator with key 'narrator' describing the characters, scene, and emotions"
             },
             {
                 "role": "user",
@@ -33,10 +33,7 @@ class TextChatGPT(TextAi):
             },
             {
                 "role": "assistant",
-                "content": '{"dialogue":[{"speaker":"Max Doe","message"' +
-                ':"Joana, I must say, your taste in bagels is utterly ' +
-                'appalling!"},{"speaker":"Joana Smith","message":' +
-                '"Max, you are right."}]};'
+                "content": '{"dialogue":[{"speaker":"narrator","message":"Max stood close to Joana."},{"speaker":"Max Doe","message":"Joana, I must say, your taste in bagels is utterly appalling!"},{"speaker":"Joana Smith","message":"Max, you are right."},{"speaker":"narrator","message":"Finally Max was happy."}]}'
             }

         ]
@@ -53,21 +50,49 @@ class TextChatGPT(TextAi):
         self.messages.append(
             {"role": "user", "content": json.dumps(self.user_message)}
         )
-        completion = openai.ChatCompletion.create(
-            model="gpt-3.5-turbo",
-            messages=self.messages,
-        )
-        # clear for next time
-        # self.messages = []
+
         self.user_message = {}
+        messages = self.messages
+        full_resp = ""

-        resp = completion.choices[0].message.content
-        self.messages.append(
-            {"role": "assistant", "content": resp}
-        )
-        logging.info("Recieved " + resp)
-        resp = json_trim(resp)
-        try:
-            return json.dumps(json.loads(resp))
-        except Exception:
-            return "{}"
+        count = 5
+        while count > 0:
+            count -= 1
+            completion = openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                messages=messages,
+            )
+
+            resp = completion.choices[0].message.content
+            finish_reason = completion.choices[0].finish_reason
+            full_resp += resp
+
+            logging.info(">> Recieved: " + resp)
+            logging.info(">> Have: " + full_resp)
+            logging.info(">> finish_reason: " + completion.choices[0].finish_reason)
+            if finish_reason == 'stop':
+                logging.info('chatgpt: got all info')
+                self.messages.append(
+                    {"role": "assistant", "content": full_resp}
+                )
+                try:
+                    resp = json_trim(resp)
+                    return json.dumps(json.loads(resp))
+                except Exception:
+                    return "{}"
+            elif finish_reason == 'length':
+                logging.info(f'chatgpt: need more info, retrying, usage:{completion.choices[0].usage} count:{count}')
+                messages += [
+                    {"role": "assistant", "content": ""},
+                ]
+            else:
+                raise Exception("finish reason is " + finish_reason)
+
+            ## DEBUG
+            try:
+                json.dumps(resp)
+            except Exception:
+                logging.critical("******* >> resp is not json but finish_reason: " + completion.choices[0].finish_reason)
+
+
+        raise Exception('Tried too many times to talk to ChatGPT')
diff --git a/ai_presenter/voice_ai/base.py b/ai_presenter/voice_ai/base.py
index f03562f..48a76e2 100644
--- a/ai_presenter/voice_ai/base.py
+++ b/ai_presenter/voice_ai/base.py
@@ -1,6 +1,6 @@
 from ai_presenter.database import Database
 from ai_presenter.config.voice import VoiceConfig
-from elevenlabs import Iterator, Voice
+from elevenlabs import Iterator, Voice, Voices
 import json
 import logging

@@ -39,26 +39,27 @@ class VoiceAI:

     def create_character_db(self, line: str):
         json_string = line.strip()
-        data = (json.loads(json_string))
+        data = json.loads(json_string)

         for message in data['dialogue']:
             name = message['speaker']
+
             if name not in self.characters:
+                logging.info(f"creating character {name}")
                 try:
                     character_config = VoiceConfig(name,
-                                                   self.actors[name].gender,
-                                                   self.actors[name].age,
-                                                   self.actors[name].accent,
-                                                   1.99,
-                                                   self.actors[name].
-                                                   description)
+                                                self.actors[name].gender,
+                                                self.actors[name].age,
+                                                self.actors[name].accent,
+                                                1.99,
+                                                self.actors[name].description)
                     self.characters[name] = self.new_actor(character_config)
                 except Exception:
                     character_config = VoiceConfig(name,
-                                                   'male',
-                                                   'middle_aged',
-                                                   "british",
-                                                   1.99,
-                                                   f"This is the {name}")
+                                                'male',
+                                                'middle_aged',
+                                                "british",
+                                                1.99,
+                                                f"This is the {name}")
                     self.characters[name] = self.new_actor(character_config)
         return data
diff --git a/ai_presenter/voice_ai/elevenlabs.py b/ai_presenter/voice_ai/elevenlabs.py
index 52aa8df..fb080ca 100644
--- a/ai_presenter/voice_ai/elevenlabs.py
+++ b/ai_presenter/voice_ai/elevenlabs.py
@@ -7,6 +7,21 @@ import logging
 import os


+class VoiceAIDefaultActorElevenLabs(VoiceAIActor):
+    def __init__(self, config: VoiceConfig, voice: Voice):
+        super().__init__(config)
+        self.voice = voice
+
+    # .says takes the message and generates audio from that message
+    # note: for the real voiceaiactor class, the elevenlabs generate
+    # methods return raw data called audio which can be manipulated before
+    # saving to a file(ie. concatenation)
+    def says(self, message) -> (bytes | Iterator[bytes]):
+        logging.info(f'{self.name} says {message}')
+        audio = generate(text=message, model="eleven_monolingual_v1",
+                         voice=self.voice)
+        return audio
+
 class VoiceAIActorElevenLabs(VoiceAIActor):
     def __init__(self, config: VoiceConfig):
         super().__init__(config)
@@ -16,7 +31,7 @@ class VoiceAIActorElevenLabs(VoiceAIActor):
             f'I am {self.name}. I am a {self.age} year old ' + \
             f'{self.gender} with a {self.accent} accent.'

-        logging.info(f"designing a voice for {self.name}")
+        logging.info(f"designing a voice for {self.name} gender:{self.gender}")
         self.voice_design = VoiceDesign(name=self.name,
                                         text=self.sample_text,
                                         gender=self.gender,
@@ -40,7 +55,12 @@ class ElevenLabs(VoiceAI):
     def __init__(self, db: Database):
         super().__init__(db)

-    def new_actor(self, config):
+    def new_actor(self, config) -> VoiceAIActor:
+        if config.name == 'narrator':
+            return VoiceAIDefaultActorElevenLabs(
+                config,
+                Voice(voice_id='idofvoice', name='narrator'),
+            )
         return VoiceAIActorElevenLabs(config)

     # make narrator actor
@@ -68,7 +88,6 @@ class ElevenLabs(VoiceAI):
                 for message in data['dialogue']:
                     name = message['speaker']
                     text = message['message']
-                    logging.info('ElevenLabs: Stitching together audio')
                     audio += self.characters[name].says(text)
         logging.info(f"ElevenLabs: Audio can be found in {output_file}")
         save(audio, output_file)
diff --git a/ai_presenter/voice_ai/fake.py b/ai_presenter/voice_ai/fake.py
index abee3bc..89a6c2e 100644
--- a/ai_presenter/voice_ai/fake.py
+++ b/ai_presenter/voice_ai/fake.py
@@ -10,7 +10,7 @@ class VoiceAIActorFake(VoiceAIActor):
     def __init__(self, config: VoiceConfig):
         super().__init__(config)

-    def says(self, message, emotion) -> (bytes | Iterator[bytes]):
+    def says(self, message) -> (bytes | Iterator[bytes]):
         # .says takes the message and generates audio from that message
         # this audio gets saved to a file
         # personally don't think says needs a file passed to it bc
@@ -18,7 +18,7 @@ class VoiceAIActorFake(VoiceAIActor):
         # methods return raw data called audio which can be manipulated before
         # saving to a file(ie. concatenation)
         logging.info(f'VoiceAIActorFake: {self.name} ' +
-                     f'says {message} in a {emotion} way')
+                     f'says {message}')

         audio = f'name: {self.name}\ngender: {self.gender}\n' + \
             f'age: {self.age}\naccent: {self.accent}\n' + \
@@ -27,11 +27,10 @@ class VoiceAIActorFake(VoiceAIActor):
                 f'message: {message}\n\n'
         return audio

-    def __get_voice(self, emotion) -> Voice:
+    def __get_voice(self) -> Voice:
         logging.info(f'I am {self.name}. I am a {self.age} year old ' +
                      f'{self.gender} with a {self.accent} accent. I am ' +
-                     f'currently speaking in a {emotion} tone because I' +
-                     f' am {emotion}')
+                     f'currently speaking')


 class VoiceAIFake(VoiceAI):
@@ -62,9 +61,8 @@ class VoiceAIFake(VoiceAI):
                 for message in data['dialogue']:
                     name = message['speaker']
                     text = message['message']
-                    emotion = message['emotion']
                     logging.info('VoiceAIFake: Stitching together audio')
-                    audio += self.characters[name].says(text, emotion)
+                    audio += self.characters[name].says(text)

         logging.info('VoiceAIFake: Generating audio file')
         with open(output_file, 'w') as out:
	diff --git a/.gitignore b/.gitignore
	index cd5cc6c..13466b5 100644
	--- a/.gitignore
	+++ b/.gitignore
	@@ -3,4 +3,7 @@ venv
	__pycache__
	*.pyc
	*.txt
	+*.wav
	+*.mp3
	!requirements.txt
	+*.diff
	diff --git a/ai_presenter/ai_presenter.py b/ai_presenter/ai_presenter.py
	index 18c6df5..d20e135 100644
	--- a/ai_presenter/ai_presenter.py
	+++ b/ai_presenter/ai_presenter.py
	@@ -19,7 +19,7 @@ class AIPresenter:
	textai = self.generator.get_text()
	for key, scene in self.database.scenes.items():
	logging.info(f"Working on scene: {scene.name} in " +
	- f"{scene.location}")
	+ f"{scene.location}")

	output = textai.generate(scene)
	file.write(output + '\n')
	diff --git a/ai_presenter/config/voice.py b/ai_presenter/config/voice.py
	index c4ea3bd..646243b 100644
	--- a/ai_presenter/config/voice.py
	+++ b/ai_presenter/config/voice.py
	@@ -2,8 +2,8 @@
	# VoiceConfig can have passed in name, gender, age,
	# accent, and accent_strength and have these initialized
	class VoiceConfig:
	- def __init__(self, name, gender, age, accent,
	- accent_strength, description):
	+ def __init__(self, name='', gender='', age='', accent='',
	+ accent_strength=0, description=''):
	self.name = name
	self.gender = gender
	self.age = age
	diff --git a/ai_presenter/database.py b/ai_presenter/database.py
	index 02e05d4..7d8110e 100644
	--- a/ai_presenter/database.py
	+++ b/ai_presenter/database.py
	@@ -1,3 +1,4 @@
	+import logging
	from ai_presenter.config.config import Config


	@@ -28,6 +29,7 @@ class Actor:
	self.height = data['height']
	self.gender = data['gender']
	self.accent = data['accent']
	+ logging.info(f'actor: name:{self.name} gender:{self.gender}')


	class Scene:
	diff --git a/ai_presenter/text_ai/chatgpt.py b/ai_presenter/text_ai/chatgpt.py
	index 1278faf..a69faad 100644
	--- a/ai_presenter/text_ai/chatgpt.py
	+++ b/ai_presenter/text_ai/chatgpt.py
	@@ -17,7 +17,7 @@ class TextChatGPT(TextAi):
	"content": "You will be provided with a set of characters, " +
	"their description, and a scene in JSON format. " +
	"Create dialogue using the plot and characters " +
	- "provided and return it in JSON format."
	+ "provided and return it in JSON format. Add a narrator with key 'narrator' describing the characters, scene, and emotions"
	},
	{
	"role": "user",
	@@ -33,10 +33,7 @@ class TextChatGPT(TextAi):
	},
	{
	"role": "assistant",
	- "content": '{"dialogue":[{"speaker":"Max Doe","message"' +
	- ':"Joana, I must say, your taste in bagels is utterly ' +
	- 'appalling!"},{"speaker":"Joana Smith","message":' +
	- '"Max, you are right."}]};'
	+ "content": '{"dialogue":[{"speaker":"narrator","message":"Max stood close to Joana."},{"speaker":"Max Doe","message":"Joana, I must say, your taste in bagels is utterly appalling!"},{"speaker":"Joana Smith","message":"Max, you are right."},{"speaker":"narrator","message":"Finally Max was happy."}]}'
	}

	]
	@@ -53,21 +50,49 @@ class TextChatGPT(TextAi):
	self.messages.append(
	{"role": "user", "content": json.dumps(self.user_message)}
	)
	- completion = openai.ChatCompletion.create(
	- model="gpt-3.5-turbo",
	- messages=self.messages,
	- )
	- # clear for next time
	- # self.messages = []
	+
	self.user_message = {}
	+ messages = self.messages
	+ full_resp = ""

	- resp = completion.choices[0].message.content
	- self.messages.append(
	- {"role": "assistant", "content": resp}
	- )
	- logging.info("Recieved " + resp)
	- resp = json_trim(resp)
	- try:
	- return json.dumps(json.loads(resp))
	- except Exception:
	- return "{}"
	+ count = 5
	+ while count > 0:
	+ count -= 1
	+ completion = openai.ChatCompletion.create(
	+ model="gpt-3.5-turbo",
	+ messages=messages,
	+ )
	+
	+ resp = completion.choices[0].message.content
	+ finish_reason = completion.choices[0].finish_reason
	+ full_resp += resp
	+
	+ logging.info(">> Recieved: " + resp)
	+ logging.info(">> Have: " + full_resp)
	+ logging.info(">> finish_reason: " + completion.choices[0].finish_reason)
	+ if finish_reason == 'stop':
	+ logging.info('chatgpt: got all info')
	+ self.messages.append(
	+ {"role": "assistant", "content": full_resp}
	+ )
	+ try:
	+ resp = json_trim(resp)
	+ return json.dumps(json.loads(resp))
	+ except Exception:
	+ return "{}"
	+ elif finish_reason == 'length':
	+ logging.info(f'chatgpt: need more info, retrying, usage:{completion.choices[0].usage} count:{count}')
	+ messages += [
	+ {"role": "assistant", "content": ""},
	+ ]
	+ else:
	+ raise Exception("finish reason is " + finish_reason)
	+
	+ ## DEBUG
	+ try:
	+ json.dumps(resp)
	+ except Exception:
	+ logging.critical("******* >> resp is not json but finish_reason: " + completion.choices[0].finish_reason)
	+
	+
	+ raise Exception('Tried too many times to talk to ChatGPT')
	diff --git a/ai_presenter/voice_ai/base.py b/ai_presenter/voice_ai/base.py
	index f03562f..48a76e2 100644
	--- a/ai_presenter/voice_ai/base.py
	+++ b/ai_presenter/voice_ai/base.py
	@@ -1,6 +1,6 @@
	from ai_presenter.database import Database
	from ai_presenter.config.voice import VoiceConfig
	-from elevenlabs import Iterator, Voice
	+from elevenlabs import Iterator, Voice, Voices
	import json
	import logging

	@@ -39,26 +39,27 @@ class VoiceAI:

	def create_character_db(self, line: str):
	json_string = line.strip()
	- data = (json.loads(json_string))
	+ data = json.loads(json_string)

	for message in data['dialogue']:
	name = message['speaker']
	+
	if name not in self.characters:
	+ logging.info(f"creating character {name}")
	try:
	character_config = VoiceConfig(name,
	- self.actors[name].gender,
	- self.actors[name].age,
	- self.actors[name].accent,
	- 1.99,
	- self.actors[name].
	- description)
	+ self.actors[name].gender,
	+ self.actors[name].age,
	+ self.actors[name].accent,
	+ 1.99,
	+ self.actors[name].description)
	self.characters[name] = self.new_actor(character_config)
	except Exception:
	character_config = VoiceConfig(name,
	- 'male',
	- 'middle_aged',
	- "british",
	- 1.99,
	- f"This is the {name}")
	+ 'male',
	+ 'middle_aged',
	+ "british",
	+ 1.99,
	+ f"This is the {name}")
	self.characters[name] = self.new_actor(character_config)
	return data
	diff --git a/ai_presenter/voice_ai/elevenlabs.py b/ai_presenter/voice_ai/elevenlabs.py
	index 52aa8df..fb080ca 100644
	--- a/ai_presenter/voice_ai/elevenlabs.py
	+++ b/ai_presenter/voice_ai/elevenlabs.py
	@@ -7,6 +7,21 @@ import logging
	import os


	+class VoiceAIDefaultActorElevenLabs(VoiceAIActor):
	+ def __init__(self, config: VoiceConfig, voice: Voice):
	+ super().__init__(config)
	+ self.voice = voice
	+
	+ # .says takes the message and generates audio from that message
	+ # note: for the real voiceaiactor class, the elevenlabs generate
	+ # methods return raw data called audio which can be manipulated before
	+ # saving to a file(ie. concatenation)
	+ def says(self, message) -> (bytes \| Iterator[bytes]):
	+ logging.info(f'{self.name} says {message}')
	+ audio = generate(text=message, model="eleven_monolingual_v1",
	+ voice=self.voice)
	+ return audio
	+
	class VoiceAIActorElevenLabs(VoiceAIActor):
	def __init__(self, config: VoiceConfig):
	super().__init__(config)
	@@ -16,7 +31,7 @@ class VoiceAIActorElevenLabs(VoiceAIActor):
	f'I am {self.name}. I am a {self.age} year old ' + \
	f'{self.gender} with a {self.accent} accent.'

	- logging.info(f"designing a voice for {self.name}")
	+ logging.info(f"designing a voice for {self.name} gender:{self.gender}")
	self.voice_design = VoiceDesign(name=self.name,
	text=self.sample_text,
	gender=self.gender,
	@@ -40,7 +55,12 @@ class ElevenLabs(VoiceAI):
	def __init__(self, db: Database):
	super().__init__(db)

	- def new_actor(self, config):
	+ def new_actor(self, config) -> VoiceAIActor:
	+ if config.name == 'narrator':
	+ return VoiceAIDefaultActorElevenLabs(
	+ config,
	+ Voice(voice_id='idofvoice', name='narrator'),
	+ )
	return VoiceAIActorElevenLabs(config)

	# make narrator actor
	@@ -68,7 +88,6 @@ class ElevenLabs(VoiceAI):
	for message in data['dialogue']:
	name = message['speaker']
	text = message['message']
	- logging.info('ElevenLabs: Stitching together audio')
	audio += self.characters[name].says(text)
	logging.info(f"ElevenLabs: Audio can be found in {output_file}")
	save(audio, output_file)
	diff --git a/ai_presenter/voice_ai/fake.py b/ai_presenter/voice_ai/fake.py
	index abee3bc..89a6c2e 100644
	--- a/ai_presenter/voice_ai/fake.py
	+++ b/ai_presenter/voice_ai/fake.py
	@@ -10,7 +10,7 @@ class VoiceAIActorFake(VoiceAIActor):
	def __init__(self, config: VoiceConfig):
	super().__init__(config)

	- def says(self, message, emotion) -> (bytes \| Iterator[bytes]):
	+ def says(self, message) -> (bytes \| Iterator[bytes]):
	# .says takes the message and generates audio from that message
	# this audio gets saved to a file
	# personally don't think says needs a file passed to it bc
	@@ -18,7 +18,7 @@ class VoiceAIActorFake(VoiceAIActor):
	# methods return raw data called audio which can be manipulated before
	# saving to a file(ie. concatenation)
	logging.info(f'VoiceAIActorFake: {self.name} ' +
	- f'says {message} in a {emotion} way')
	+ f'says {message}')

	audio = f'name: {self.name}\ngender: {self.gender}\n' + \
	f'age: {self.age}\naccent: {self.accent}\n' + \
	@@ -27,11 +27,10 @@ class VoiceAIActorFake(VoiceAIActor):
	f'message: {message}\n\n'
	return audio

	- def __get_voice(self, emotion) -> Voice:
	+ def __get_voice(self) -> Voice:
	logging.info(f'I am {self.name}. I am a {self.age} year old ' +
	f'{self.gender} with a {self.accent} accent. I am ' +
	- f'currently speaking in a {emotion} tone because I' +
	- f' am {emotion}')
	+ f'currently speaking')


	class VoiceAIFake(VoiceAI):
	@@ -62,9 +61,8 @@ class VoiceAIFake(VoiceAI):
	for message in data['dialogue']:
	name = message['speaker']
	text = message['message']
	- emotion = message['emotion']
	logging.info('VoiceAIFake: Stitching together audio')
	- audio += self.characters[name].says(text, emotion)
	+ audio += self.characters[name].says(text)

	logging.info('VoiceAIFake: Generating audio file')
	with open(output_file, 'w') as out: