Skip to content

Commit

Permalink
Update demo to match CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
daavoo committed Dec 10, 2024
1 parent 5d17b19 commit bd56198
Showing 1 changed file with 13 additions and 26 deletions.
39 changes: 13 additions & 26 deletions demo/app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import re
from pathlib import Path

Expand All @@ -8,34 +9,11 @@
load_llama_cpp_model,
load_parler_tts_model_and_tokenizer,
)
from document_to_podcast.config import DEFAULT_PROMPT, DEFAULT_SPEAKERS
from document_to_podcast.inference.text_to_speech import text_to_speech
from document_to_podcast.inference.text_to_text import text_to_text_stream


PODCAST_PROMPT = """
You are a podcast scriptwriter generating engaging and natural-sounding conversations in JSON format. The script features two speakers:
Speaker 1: Laura, the main host. She explains topics clearly using anecdotes and analogies, teaching in an engaging and captivating way.
Speaker 2: Jon, the co-host. He keeps the conversation on track, asks curious follow-up questions, and reacts with excitement or confusion, often using interjections like “hmm” or “umm.”
Instructions:
- Write dynamic, easy-to-follow dialogue.
- Include natural interruptions and interjections.
- Avoid repetitive phrasing between speakers.
- Format output as a JSON conversation.
Example:
{
"Speaker 1": "Welcome to our podcast! Today, we're exploring...",
"Speaker 2": "Hi Laura! I'm excited to hear about this. Can you explain...",
"Speaker 1": "Sure! Imagine it like this...",
"Speaker 2": "Oh, that's cool! But how does..."
}
"""

SPEAKER_DESCRIPTIONS = {
"1": "Laura's voice is exciting and fast in delivery with very clear audio and no background noise.",
"2": "Jon's voice is calm with very clear audio and no background noise.",
}


@st.cache_resource
def load_text_to_text_model():
return load_llama_cpp_model(
Expand Down Expand Up @@ -99,9 +77,13 @@ def load_text_to_speech_model_and_tokenizer():
st.divider()
st.header("Podcast generation")

system_prompt = st.text_area("Podcast generation prompt", value=PODCAST_PROMPT)
speakers = st.text_area("Speaker configuration", value=DEFAULT_SPEAKERS)

if st.button("Generate Podcast"):
speakers = json.loads(speakers)
system_prompt = DEFAULT_PROMPT.replace(
"{SPEAKERS}", "\n".join(str(speaker) for speaker in speakers)
)
with st.spinner("Generating Podcast..."):
text = ""
for chunk in text_to_text_stream(
Expand All @@ -111,12 +93,17 @@ def load_text_to_speech_model_and_tokenizer():
if text.endswith("\n") and "Speaker" in text:
st.write(text)
speaker_id = re.search(r"Speaker (\d+)", text).group(1)
tone = next(
speaker["tone"]
for speaker in speakers
if speaker["id"] == int(speaker_id)
)
with st.spinner("Generating Audio..."):
speech = text_to_speech(
text.split(f'"Speaker {speaker_id}":')[-1],
speech_model,
speech_tokenizer,
SPEAKER_DESCRIPTIONS[speaker_id],
tone,
)
st.audio(speech, sample_rate=speech_model.config.sampling_rate)
text = ""

0 comments on commit bd56198

Please sign in to comment.