Nihal2000 commited on
Commit
ac06f3e
·
verified ·
1 Parent(s): e56613a

Update services/podcast_generator_service.py

Browse files
Files changed (1) hide show
  1. services/podcast_generator_service.py +294 -247
services/podcast_generator_service.py CHANGED
@@ -1,12 +1,10 @@
1
  import logging
2
- import asyncio
3
- import json
4
- import uuid
5
  from typing import List, Dict, Any, Optional
6
  from dataclasses import dataclass, asdict
7
  from datetime import datetime
8
  from pathlib import Path
9
  import re
 
10
 
11
  try:
12
  from elevenlabs import VoiceSettings
@@ -18,15 +16,16 @@ except ImportError:
18
  import config
19
  from services.llamaindex_service import LlamaIndexService
20
  from services.llm_service import LLMService
 
21
 
22
  logger = logging.getLogger(__name__)
23
 
24
  @dataclass
25
  class DocumentAnalysis:
26
  """Analysis results from document(s)"""
27
- key_insights: List[str] # 5-7 main points
28
  topics: List[str]
29
- complexity_level: str # beginner, intermediate, advanced
30
  estimated_words: int
31
  source_documents: List[str]
32
  summary: str
@@ -34,10 +33,10 @@ class DocumentAnalysis:
34
  @dataclass
35
  class DialogueLine:
36
  """Single line of podcast dialogue"""
37
- speaker: str # "HOST1" or "HOST2"
38
  text: str
39
- pause_after: float = 0.5 # seconds
40
-
41
  @dataclass
42
  class PodcastScript:
43
  """Complete podcast script"""
@@ -47,7 +46,6 @@ class PodcastScript:
47
  style: str
48
 
49
  def to_text(self) -> str:
50
- """Convert to readable transcript"""
51
  lines = []
52
  for line in self.dialogue:
53
  lines.append(f"{line.speaker}: {line.text}")
@@ -83,98 +81,94 @@ class PodcastResult:
83
  class PodcastGeneratorService:
84
  """
85
  Service for generating conversational podcasts from documents.
86
- Combines LlamaIndex for analysis and ElevenLabs for voice synthesis.
87
  """
88
 
89
- # Word count per minute for podcast pacing
90
  WORDS_PER_MINUTE = 150
91
 
92
- # Script generation prompts for different styles
93
  SCRIPT_PROMPTS = {
94
- "conversational": """You are an expert podcast script writer. Create an engaging 2-host podcast discussing insights from documents.
95
 
96
- CONTEXT:
97
- {analysis}
 
 
 
98
 
99
  REQUIREMENTS:
100
  - Duration: {duration_minutes} minutes (approximately {word_count} words)
101
  - Style: Conversational, friendly, and accessible
102
  - Format: Alternating dialogue between HOST1 and HOST2
103
- - Include natural transitions, questions, and "aha!" moments
104
- - Make complex topics easy to understand
105
- - Add enthusiasm and genuine curiosity
106
- - Balance speaking time between both hosts
107
 
108
  DIALOGUE FORMAT (strictly follow):
109
  HOST1: [What they say]
110
  HOST2: [What they say]
111
 
112
  STRUCTURE:
113
- 1. Opening Hook (30 seconds): Grab attention with an intriguing question or fact
114
- 2. Introduction (1 minute): Set context and preview what's coming
115
- 3. Main Discussion (70% of time): Deep dive into key insights
116
- 4. Wrap-up (1 minute): Summarize key takeaways and final thoughts
117
-
118
- TONE: Friendly, enthusiastic, educational but not condescending
119
 
120
  Generate the complete podcast script now:""",
121
 
122
- "educational": """You are creating an educational podcast script. Two hosts discuss document insights in a clear, instructive manner.
123
 
124
- CONTEXT:
125
- {analysis}
 
 
 
126
 
127
  REQUIREMENTS:
128
  - Duration: {duration_minutes} minutes (approximately {word_count} words)
129
  - Style: Clear, methodical, educational
130
- - HOST1 acts as the teacher/expert, HOST2 as the curious learner
131
- - Include explanations of complex concepts
132
- - Use examples and analogies
133
- - Build knowledge progressively
134
 
135
  DIALOGUE FORMAT:
136
  HOST1: [Expert explanation]
137
- HOST2: [Clarifying question or observation]
138
 
139
- Generate the complete educational podcast script now:""",
140
 
141
- "technical": """You are writing a technical podcast for an informed audience. Discuss document insights with precision and depth.
 
 
 
142
 
143
- CONTEXT:
144
- {analysis}
145
 
146
  REQUIREMENTS:
147
  - Duration: {duration_minutes} minutes (approximately {word_count} words)
148
  - Style: Professional, detailed, technically accurate
149
- - HOST1 is the subject matter expert, HOST2 is an informed interviewer
150
- - Use proper technical terminology
151
- - Dive into implementation details
152
- - Discuss implications and applications
153
 
154
  DIALOGUE FORMAT:
155
  HOST1: [Technical insight]
156
  HOST2: [Probing question]
157
 
158
- Generate the complete technical podcast script now:""",
159
 
160
- "casual": """You are creating a fun, casual podcast. Two friends discuss interesting ideas from documents.
 
 
 
161
 
162
- CONTEXT:
163
- {analysis}
164
 
165
  REQUIREMENTS:
166
  - Duration: {duration_minutes} minutes (approximately {word_count} words)
167
  - Style: Relaxed, humorous, energetic
168
- - Both hosts are enthusiastic and engaged
169
- - Use casual language and occasional humor
170
- - Make it entertaining while staying informative
171
- - Quick pacing with energy
172
 
173
  DIALOGUE FORMAT:
174
  HOST1: [Casual commentary]
175
  HOST2: [Enthusiastic response]
176
 
177
- Generate the complete casual podcast script now:"""
178
  }
179
 
180
  def __init__(
@@ -183,18 +177,13 @@ Generate the complete casual podcast script now:"""
183
  llm_service: LLMService,
184
  elevenlabs_api_key: Optional[str] = None
185
  ):
186
- """
187
- Initialize podcast generator service
188
-
189
- Args:
190
- llamaindex_service: Service for document analysis
191
- llm_service: Service for script generation
192
- elevenlabs_api_key: ElevenLabs API key (uses config if not provided)
193
- """
194
  self.config = config.config
195
  self.llamaindex_service = llamaindex_service
196
  self.llm_service = llm_service
197
 
 
 
 
198
  # Initialize ElevenLabs client
199
  self.elevenlabs_client = None
200
  if ELEVENLABS_AVAILABLE:
@@ -213,10 +202,14 @@ Generate the complete casual podcast script now:"""
213
  # Metadata database file
214
  self.metadata_file = self.podcast_dir / "metadata_db.json"
215
  self._ensure_metadata_db()
 
 
 
216
 
217
  def _ensure_metadata_db(self):
218
  """Ensure metadata database exists"""
219
  if not self.metadata_file.exists():
 
220
  self.metadata_file.write_text(json.dumps([], indent=2))
221
 
222
  async def generate_podcast(
@@ -227,19 +220,7 @@ Generate the complete casual podcast script now:"""
227
  host1_voice: str = "Rachel",
228
  host2_voice: str = "Adam"
229
  ) -> PodcastResult:
230
- """
231
- Generate a complete podcast from documents
232
-
233
- Args:
234
- document_ids: List of document IDs to analyze
235
- style: Podcast style (conversational, educational, technical, casual)
236
- duration_minutes: Target duration in minutes
237
- host1_voice: Voice name for first host
238
- host2_voice: Voice name for second host
239
-
240
- Returns:
241
- PodcastResult with audio file path and metadata
242
- """
243
  start_time = datetime.now()
244
  podcast_id = str(uuid.uuid4())
245
 
@@ -247,8 +228,8 @@ Generate the complete casual podcast script now:"""
247
  logger.info(f"Starting podcast generation {podcast_id}")
248
  logger.info(f"Documents: {document_ids}, Style: {style}, Duration: {duration_minutes}min")
249
 
250
- # Step 1: Analyze documents
251
- logger.info("Step 1: Analyzing documents...")
252
  analysis = await self.analyze_documents(document_ids)
253
 
254
  # Step 2: Generate script
@@ -311,57 +292,133 @@ Generate the complete casual podcast script now:"""
311
 
312
  async def analyze_documents(self, document_ids: List[str]) -> DocumentAnalysis:
313
  """
314
- Analyze documents to extract key insights for podcast
315
-
316
- Args:
317
- document_ids: List of document IDs
318
 
319
- Returns:
320
- DocumentAnalysis with key insights and topics
321
  """
322
- # Create analysis query for the agentic RAG
323
- analysis_query = f"""Analyze the following documents and provide:
324
- 1. The 5-7 most important insights or key points
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  2. Main themes and topics covered
326
  3. The overall complexity level (beginner/intermediate/advanced)
327
- 4. A brief summary suitable for podcast discussion
328
 
329
- Document IDs: {', '.join(document_ids)}
 
330
 
331
- Provide a structured analysis optimized for creating an engaging podcast discussion."""
332
-
333
- # Use LlamaIndex agentic RAG for analysis
334
- result = await self.llamaindex_service.query(analysis_query)
335
-
336
- # Parse the result to extract structured information
337
- # This is a simplified parser - in production, you might want more robust parsing
338
- insights = self._extract_insights(result)
339
- topics = self._extract_topics(result)
340
- complexity = self._determine_complexity(result)
341
-
342
- return DocumentAnalysis(
343
- key_insights=insights[:7], # Limit to 7
344
- topics=topics,
345
- complexity_level=complexity,
346
- estimated_words=len(result.split()),
347
- source_documents=document_ids,
348
- summary=result
349
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
  def _extract_insights(self, text: str) -> List[str]:
352
  """Extract key insights from analysis text"""
353
  insights = []
354
- #Simple extraction based on numbered lists or bullet points
355
  lines = text.split('\n')
 
 
356
  for line in lines:
357
  line = line.strip()
358
- # Match patterns like "1.", "2.", "-", "*", "•"
359
- if re.match(r'^\d+\.|\-|\*|•', line):
 
 
 
 
 
 
 
360
  insight = re.sub(r'^\d+\.|\-|\*|•', '', line).strip()
361
- if len(insight) > 20: # Ensure it's substantial
362
  insights.append(insight)
363
 
364
- # If no insights found, create from first few sentences
365
  if not insights:
366
  sentences = text.split('.')
367
  insights = [s.strip() + '.' for s in sentences[:7] if len(s.strip()) > 20]
@@ -370,25 +427,50 @@ Provide a structured analysis optimized for creating an engaging podcast discuss
370
 
371
  def _extract_topics(self, text: str) -> List[str]:
372
  """Extract main topics from analysis"""
373
- # Simple keyword extraction - could be enhanced with NLP
374
- common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
375
- words = text.lower().split()
376
- word_freq = {}
377
-
378
- for word in words:
379
- word = re.sub(r'[^\w\s]', '', word)
380
- if len(word) > 4 and word not in common_words:
381
- word_freq[word] = word_freq.get(word, 0) + 1
382
-
383
- # Get top topics
384
- topics = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:5]
385
- return [topic[0].title() for topic in topics]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
 
387
  def _determine_complexity(self, text: str) -> str:
388
  """Determine content complexity level"""
389
  text_lower = text.lower()
390
 
391
- # Simple heuristic based on keywords
 
 
 
 
 
392
  if any(word in text_lower for word in ['basic', 'introduction', 'beginner', 'simple']):
393
  return "beginner"
394
  elif any(word in text_lower for word in ['advanced', 'complex', 'sophisticated', 'expert']):
@@ -402,59 +484,42 @@ Provide a structured analysis optimized for creating an engaging podcast discuss
402
  style: str,
403
  duration_minutes: int
404
  ) -> PodcastScript:
405
- """
406
- Generate podcast script from analysis
407
-
408
- Args:
409
- analysis: Document analysis results
410
- style: Podcast style
411
- duration_minutes: Target duration
412
-
413
- Returns:
414
- Complete podcast script
415
- """
416
- # Calculate target word count
417
  target_words = duration_minutes * self.WORDS_PER_MINUTE
418
 
419
- # Prepare analysis context
420
- analysis_context = f"""
421
- KEY INSIGHTS:
422
- {chr(10).join(f"{i+1}. {insight}" for i, insight in enumerate(analysis.key_insights))}
423
-
424
- TOPICS: {', '.join(analysis.topics)}
425
- COMPLEXITY: {analysis.complexity_level}
426
-
427
- SUMMARY:
428
- {analysis.summary[:500]}...
429
- """
430
 
431
- # Get prompt template for style
432
  prompt_template = self.SCRIPT_PROMPTS.get(style, self.SCRIPT_PROMPTS["conversational"])
433
 
434
- # Fill in the template
435
  prompt = prompt_template.format(
436
- analysis=analysis_context,
 
437
  duration_minutes=duration_minutes,
438
  word_count=target_words
439
  )
440
 
441
- # Generate script using LLM
442
  script_text = await self.llm_service.generate_text(
443
  prompt,
444
- max_tokens=target_words * 2, # Give room for generation
445
- temperature=0.8 # More creative
446
  )
447
 
448
- # Parse script into dialogue lines
449
  dialogue = self._parse_script(script_text)
450
 
451
- # Calculate actual word count and duration
 
 
452
  word_count = sum(len(line.text.split()) for line in dialogue)
453
  duration_estimate = word_count / self.WORDS_PER_MINUTE
454
 
455
  return PodcastScript(
456
  dialogue=dialogue,
457
- total_duration_estimate=duration_estimate * 60, # Convert to seconds
458
  word_count=word_count,
459
  style=style
460
  )
@@ -469,7 +534,6 @@ SUMMARY:
469
  if not line:
470
  continue
471
 
472
- # Match "HOST1:" or "HOST2:" format
473
  if line.startswith('HOST1:'):
474
  text = line[6:].strip()
475
  if text:
@@ -482,43 +546,34 @@ SUMMARY:
482
  return dialogue
483
 
484
  def _get_voice_id(self, voice_name: str) -> str:
485
- """
486
- Get voice ID from voice name.
487
- Falls back to first available voice if not found.
488
-
489
- Args:
490
- voice_name: Voice name (e.g., "Rachel", "Adam")
491
-
492
- Returns:
493
- Voice ID string
494
- """
495
  try:
496
- # Try to get voices and find by name
497
- voices = self.elevenlabs_client.voices.get_all()
498
-
499
- if not voices or not voices.voices:
500
- logger.error("No voices available from ElevenLabs")
501
- raise RuntimeError("No voices available")
502
-
503
- # First, try exact name match
504
- for voice in voices.voices:
505
- if voice.name.lower() == voice_name.lower():
506
- logger.info(f"Found exact voice match for '{voice_name}': {voice.voice_id}")
507
- return voice.voice_id
508
-
509
- # Try partial match
510
- for voice in voices.voices:
511
- if voice_name.lower() in voice.name.lower():
512
- logger.info(f"Found partial voice match for '{voice_name}': {voice.name} ({voice.voice_id})")
513
- return voice.voice_id
514
-
515
- # Use first available voice as fallback
516
- first_voice = voices.voices[0]
517
- logger.warning(f"Voice '{voice_name}' not found, using first available voice: {first_voice.name} ({first_voice.voice_id})")
518
- return first_voice.voice_id
519
 
520
  except Exception as e:
521
- logger.error(f"Could not fetch voices: {e}", exc_info=True)
522
  raise RuntimeError(f"Failed to get voice ID: {str(e)}")
523
 
524
  async def synthesize_audio(
@@ -528,60 +583,63 @@ SUMMARY:
528
  host1_voice: str,
529
  host2_voice: str
530
  ) -> Path:
531
- """
532
- Synthesize audio from script using ElevenLabs
533
-
534
- Args:
535
- podcast_id: Unique podcast ID
536
- script: Podcast script
537
- host1_voice: Voice for HOST1
538
- host2_voice: Voice for HOST2
539
-
540
- Returns:
541
- Path to generated MP3 file
542
- """
543
  if not self.elevenlabs_client:
544
  raise RuntimeError("ElevenLabs client not initialized")
545
 
546
  audio_file = self.podcast_dir / f"{podcast_id}.mp3"
547
 
548
- # For now, create a simple text-to-speech for the full script
549
- # In production, you'd combine segments with pauses
550
- full_text = script.to_text()
551
-
552
- # Get actual voice ID
553
- voice_id = self._get_voice_id(host1_voice)
554
-
555
  try:
556
- # Use modern ElevenLabs TTS API
557
- # Note: This is a simplified version using single voice
558
- # Full implementation would process each line separately with different voices
559
 
560
- logger.info(f"Generating audio with voice: {host1_voice}")
561
 
562
- # Use the modern text_to_speech API
563
- audio_generator = self.elevenlabs_client.text_to_speech.convert(
564
- voice_id=voice_id, # Using resolved voice ID
565
- text=full_text,
566
- model_id="eleven_multilingual_v2"
567
- )
568
 
569
- # Write audio chunks to file
570
- with open(audio_file, 'wb') as f:
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  for chunk in audio_generator:
572
  if chunk:
573
- f.write(chunk)
 
 
 
 
 
 
 
 
 
 
 
574
 
575
- # Verify file was created with content
576
  if audio_file.exists() and audio_file.stat().st_size > 1000:
577
- logger.info(f"Audio synthesized successfully: {audio_file} ({audio_file.stat().st_size} bytes)")
578
  return audio_file
579
  else:
580
- raise RuntimeError(f"Generated audio file is too small or empty: {audio_file.stat().st_size} bytes")
581
 
582
  except Exception as e:
583
  logger.error(f"Audio synthesis failed: {e}", exc_info=True)
584
- raise RuntimeError(f"Failed to generate podcast audio: {str(e)}")
585
 
586
  def _create_metadata(
587
  self,
@@ -594,28 +652,22 @@ SUMMARY:
594
  style: str
595
  ) -> PodcastMetadata:
596
  """Create podcast metadata"""
597
- # Auto-generate title
598
  title = f"Podcast: {analysis.topics[0] if analysis.topics else 'Document Discussion'}"
599
-
600
- # Create description
601
- description = f"A {style} podcast discussing insights from {len(document_ids)} document(s)."
602
-
603
- # Calculate file size
604
  file_size_mb = audio_path.stat().st_size / (1024 * 1024) if audio_path.exists() else 0
605
 
606
- # Estimate costs
607
- llm_cost = (script.word_count / 1000) * 0.01 # Rough estimate
608
- tts_cost = (script.word_count * 5 / 1000) * 0.30 # Rough estimate
609
 
610
  return PodcastMetadata(
611
  podcast_id=podcast_id,
612
  title=title,
613
  description=description,
614
- source_documents=document_ids,
615
  style=style,
616
  duration_seconds=script.total_duration_estimate,
617
  file_size_mb=file_size_mb,
618
- voices={"host1": list(voices)[0] if len(voices) > 0 else "Rachel",
619
  "host2": list(voices)[1] if len(voices) > 1 else "Adam"},
620
  generated_at=datetime.now().isoformat(),
621
  generation_cost={"llm_cost": llm_cost, "tts_cost": tts_cost, "total": llm_cost + tts_cost},
@@ -625,26 +677,21 @@ SUMMARY:
625
  def _save_metadata(self, metadata: PodcastMetadata):
626
  """Save metadata to database"""
627
  try:
628
- # Load existing metadata
629
  existing = json.loads(self.metadata_file.read_text())
630
-
631
- # Add new metadata
632
  existing.append(asdict(metadata))
633
-
634
- # Save back
635
  self.metadata_file.write_text(json.dumps(existing, indent=2))
636
-
637
- logger.info(f"Metadata saved for podcast: {metadata.podcast_id}")
638
-
639
  except Exception as e:
640
  logger.error(f"Failed to save metadata: {e}")
641
 
642
  def list_podcasts(self, limit: int = 10) -> List[PodcastMetadata]:
643
  """List generated podcasts"""
644
  try:
 
645
  data = json.loads(self.metadata_file.read_text())
646
  podcasts = [PodcastMetadata(**item) for item in data[-limit:]]
647
- return list(reversed(podcasts)) # Most recent first
648
  except Exception as e:
649
  logger.error(f"Failed to list podcasts: {e}")
650
  return []
@@ -652,6 +699,7 @@ SUMMARY:
652
  def get_podcast(self, podcast_id: str) -> Optional[PodcastMetadata]:
653
  """Get specific podcast metadata"""
654
  try:
 
655
  data = json.loads(self.metadata_file.read_text())
656
  for item in data:
657
  if item.get('podcast_id') == podcast_id:
@@ -659,5 +707,4 @@ SUMMARY:
659
  return None
660
  except Exception as e:
661
  logger.error(f"Failed to get podcast: {e}")
662
- return None
663
-
 
1
  import logging
 
 
 
2
  from typing import List, Dict, Any, Optional
3
  from dataclasses import dataclass, asdict
4
  from datetime import datetime
5
  from pathlib import Path
6
  import re
7
+ import uuid
8
 
9
  try:
10
  from elevenlabs import VoiceSettings
 
16
  import config
17
  from services.llamaindex_service import LlamaIndexService
18
  from services.llm_service import LLMService
19
+ from services.document_store_service import DocumentStoreService
20
 
21
  logger = logging.getLogger(__name__)
22
 
23
  @dataclass
24
  class DocumentAnalysis:
25
  """Analysis results from document(s)"""
26
+ key_insights: List[str]
27
  topics: List[str]
28
+ complexity_level: str
29
  estimated_words: int
30
  source_documents: List[str]
31
  summary: str
 
33
  @dataclass
34
  class DialogueLine:
35
  """Single line of podcast dialogue"""
36
+ speaker: str
37
  text: str
38
+ pause_after: float = 0.5
39
+
40
  @dataclass
41
  class PodcastScript:
42
  """Complete podcast script"""
 
46
  style: str
47
 
48
  def to_text(self) -> str:
 
49
  lines = []
50
  for line in self.dialogue:
51
  lines.append(f"{line.speaker}: {line.text}")
 
81
  class PodcastGeneratorService:
82
  """
83
  Service for generating conversational podcasts from documents.
 
84
  """
85
 
 
86
  WORDS_PER_MINUTE = 150
87
 
 
88
  SCRIPT_PROMPTS = {
89
+ "conversational": """You are an expert podcast script writer. Create an engaging 2-host podcast discussing the provided documents.
90
 
91
+ DOCUMENT CONTENT:
92
+ {document_content}
93
+
94
+ KEY INSIGHTS:
95
+ {key_insights}
96
 
97
  REQUIREMENTS:
98
  - Duration: {duration_minutes} minutes (approximately {word_count} words)
99
  - Style: Conversational, friendly, and accessible
100
  - Format: Alternating dialogue between HOST1 and HOST2
101
+ - Make the content engaging and easy to understand
102
+ - Include natural transitions and enthusiasm
 
 
103
 
104
  DIALOGUE FORMAT (strictly follow):
105
  HOST1: [What they say]
106
  HOST2: [What they say]
107
 
108
  STRUCTURE:
109
+ 1. Opening Hook (30 seconds): Grab attention
110
+ 2. Introduction (1 minute): Set context
111
+ 3. Main Discussion (70% of time): Deep dive into insights
112
+ 4. Wrap-up (1 minute): Summarize key takeaways
 
 
113
 
114
  Generate the complete podcast script now:""",
115
 
116
+ "educational": """Create an educational podcast discussing the provided documents.
117
 
118
+ DOCUMENT CONTENT:
119
+ {document_content}
120
+
121
+ KEY INSIGHTS:
122
+ {key_insights}
123
 
124
  REQUIREMENTS:
125
  - Duration: {duration_minutes} minutes (approximately {word_count} words)
126
  - Style: Clear, methodical, educational
127
+ - HOST1 acts as teacher, HOST2 as curious learner
 
 
 
128
 
129
  DIALOGUE FORMAT:
130
  HOST1: [Expert explanation]
131
+ HOST2: [Clarifying question]
132
 
133
+ Generate the educational podcast script now:""",
134
 
135
+ "technical": """Create a technical podcast for an informed audience.
136
+
137
+ DOCUMENT CONTENT:
138
+ {document_content}
139
 
140
+ KEY INSIGHTS:
141
+ {key_insights}
142
 
143
  REQUIREMENTS:
144
  - Duration: {duration_minutes} minutes (approximately {word_count} words)
145
  - Style: Professional, detailed, technically accurate
146
+ - HOST1 is expert, HOST2 is informed interviewer
 
 
 
147
 
148
  DIALOGUE FORMAT:
149
  HOST1: [Technical insight]
150
  HOST2: [Probing question]
151
 
152
+ Generate the technical podcast script now:""",
153
 
154
+ "casual": """Create a fun, casual podcast discussing the documents.
155
+
156
+ DOCUMENT CONTENT:
157
+ {document_content}
158
 
159
+ KEY INSIGHTS:
160
+ {key_insights}
161
 
162
  REQUIREMENTS:
163
  - Duration: {duration_minutes} minutes (approximately {word_count} words)
164
  - Style: Relaxed, humorous, energetic
165
+ - Make it entertaining while informative
 
 
 
166
 
167
  DIALOGUE FORMAT:
168
  HOST1: [Casual commentary]
169
  HOST2: [Enthusiastic response]
170
 
171
+ Generate the casual podcast script now:"""
172
  }
173
 
174
  def __init__(
 
177
  llm_service: LLMService,
178
  elevenlabs_api_key: Optional[str] = None
179
  ):
 
 
 
 
 
 
 
 
180
  self.config = config.config
181
  self.llamaindex_service = llamaindex_service
182
  self.llm_service = llm_service
183
 
184
+ # Get document store from llamaindex service
185
+ self.document_store = llamaindex_service.document_store
186
+
187
  # Initialize ElevenLabs client
188
  self.elevenlabs_client = None
189
  if ELEVENLABS_AVAILABLE:
 
202
  # Metadata database file
203
  self.metadata_file = self.podcast_dir / "metadata_db.json"
204
  self._ensure_metadata_db()
205
+
206
+ # Voice cache
207
+ self._voice_cache = {}
208
 
209
  def _ensure_metadata_db(self):
210
  """Ensure metadata database exists"""
211
  if not self.metadata_file.exists():
212
+ import json
213
  self.metadata_file.write_text(json.dumps([], indent=2))
214
 
215
  async def generate_podcast(
 
220
  host1_voice: str = "Rachel",
221
  host2_voice: str = "Adam"
222
  ) -> PodcastResult:
223
+ """Generate a complete podcast from documents"""
 
 
 
 
 
 
 
 
 
 
 
 
224
  start_time = datetime.now()
225
  podcast_id = str(uuid.uuid4())
226
 
 
228
  logger.info(f"Starting podcast generation {podcast_id}")
229
  logger.info(f"Documents: {document_ids}, Style: {style}, Duration: {duration_minutes}min")
230
 
231
+ # Step 1: Retrieve and analyze documents
232
+ logger.info("Step 1: Retrieving and analyzing documents...")
233
  analysis = await self.analyze_documents(document_ids)
234
 
235
  # Step 2: Generate script
 
292
 
293
  async def analyze_documents(self, document_ids: List[str]) -> DocumentAnalysis:
294
  """
295
+ Retrieve documents and extract key insights for podcast
 
 
 
296
 
297
+ FIXED: Now actually retrieves document content from document store
 
298
  """
299
+ try:
300
+ # Step 1: Retrieve actual documents from document store
301
+ logger.info(f"Retrieving {len(document_ids)} documents from store...")
302
+ documents = []
303
+ document_contents = []
304
+
305
+ for doc_id in document_ids:
306
+ doc = await self.document_store.get_document(doc_id)
307
+ if doc:
308
+ documents.append(doc)
309
+ document_contents.append(doc.content)
310
+ logger.info(f"Retrieved document: {doc.filename} ({len(doc.content)} chars)")
311
+ else:
312
+ logger.warning(f"Document {doc_id} not found in store")
313
+
314
+ if not documents:
315
+ raise ValueError(f"No documents found for IDs: {document_ids}")
316
+
317
+ # Step 2: Combine document content
318
+ combined_content = "\n\n---DOCUMENT SEPARATOR---\n\n".join(document_contents)
319
+
320
+ # Truncate if too long (keep first portion for context)
321
+ max_content_length = 15000 # Adjust based on your LLM context window
322
+ if len(combined_content) > max_content_length:
323
+ logger.warning(f"Content too long ({len(combined_content)} chars), truncating to {max_content_length}")
324
+ combined_content = combined_content[:max_content_length] + "\n\n[Content truncated...]"
325
+
326
+ # Step 3: Use LLM to analyze the content
327
+ analysis_prompt = f"""Analyze the following document(s) and provide:
328
+
329
+ 1. The 5-7 most important insights or key points (be specific and detailed)
330
  2. Main themes and topics covered
331
  3. The overall complexity level (beginner/intermediate/advanced)
332
+ 4. A comprehensive summary suitable for podcast discussion
333
 
334
+ DOCUMENTS:
335
+ {combined_content}
336
 
337
+ Provide a structured analysis optimized for creating an engaging podcast discussion.
338
+ Format your response as:
339
+
340
+ KEY INSIGHTS:
341
+ 1. [First key insight]
342
+ 2. [Second key insight]
343
+ ...
344
+
345
+ TOPICS:
346
+ - [Topic 1]
347
+ - [Topic 2]
348
+ ...
349
+
350
+ COMPLEXITY: [beginner/intermediate/advanced]
351
+
352
+ SUMMARY:
353
+ [Your comprehensive summary here]
354
+ """
355
+
356
+ logger.info("Analyzing content with LLM...")
357
+ result = await self.llm_service.generate_text(
358
+ analysis_prompt,
359
+ max_tokens=2000,
360
+ temperature=0.7
361
+ )
362
+
363
+ # Step 4: Parse the structured response
364
+ insights = self._extract_insights(result)
365
+ topics = self._extract_topics(result)
366
+ complexity = self._determine_complexity(result)
367
+ summary = self._extract_summary(result)
368
+
369
+ logger.info(f"Analysis complete: {len(insights)} insights, {len(topics)} topics")
370
+
371
+ return DocumentAnalysis(
372
+ key_insights=insights[:7],
373
+ topics=topics,
374
+ complexity_level=complexity,
375
+ estimated_words=len(combined_content.split()),
376
+ source_documents=[doc.filename for doc in documents],
377
+ summary=summary or result[:500]
378
+ )
379
+
380
+ except Exception as e:
381
+ logger.error(f"Document analysis failed: {str(e)}", exc_info=True)
382
+ raise RuntimeError(f"Failed to analyze documents: {str(e)}")
383
+
384
+ def _extract_summary(self, text: str) -> str:
385
+ """Extract summary section from analysis"""
386
+ try:
387
+ if "SUMMARY:" in text:
388
+ parts = text.split("SUMMARY:")
389
+ if len(parts) > 1:
390
+ summary = parts[1].strip()
391
+ # Take first 500 chars if too long
392
+ return summary[:500] if len(summary) > 500 else summary
393
+ except:
394
+ pass
395
+
396
+ # Fallback: take first few sentences
397
+ sentences = text.split('.')
398
+ return '. '.join(sentences[:3]) + '.'
399
 
400
  def _extract_insights(self, text: str) -> List[str]:
401
  """Extract key insights from analysis text"""
402
  insights = []
 
403
  lines = text.split('\n')
404
+
405
+ in_insights_section = False
406
  for line in lines:
407
  line = line.strip()
408
+
409
+ if "KEY INSIGHTS:" in line.upper():
410
+ in_insights_section = True
411
+ continue
412
+ elif line.upper().startswith(("TOPICS:", "COMPLEXITY:", "SUMMARY:")):
413
+ in_insights_section = False
414
+
415
+ if in_insights_section and line:
416
+ # Match patterns like "1.", "2.", "-", "*", "•"
417
  insight = re.sub(r'^\d+\.|\-|\*|•', '', line).strip()
418
+ if len(insight) > 20:
419
  insights.append(insight)
420
 
421
+ # Fallback if no insights found
422
  if not insights:
423
  sentences = text.split('.')
424
  insights = [s.strip() + '.' for s in sentences[:7] if len(s.strip()) > 20]
 
427
 
428
  def _extract_topics(self, text: str) -> List[str]:
429
  """Extract main topics from analysis"""
430
+ topics = []
431
+ lines = text.split('\n')
432
+
433
+ in_topics_section = False
434
+ for line in lines:
435
+ line = line.strip()
436
+
437
+ if "TOPICS:" in line.upper():
438
+ in_topics_section = True
439
+ continue
440
+ elif line.upper().startswith(("KEY INSIGHTS:", "COMPLEXITY:", "SUMMARY:")):
441
+ in_topics_section = False
442
+
443
+ if in_topics_section and line:
444
+ topic = re.sub(r'^\-|\*|•', '', line).strip()
445
+ if len(topic) > 2:
446
+ topics.append(topic)
447
+
448
+ # Fallback: simple keyword extraction
449
+ if not topics:
450
+ common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
451
+ words = text.lower().split()
452
+ word_freq = {}
453
+
454
+ for word in words:
455
+ word = re.sub(r'[^\w\s]', '', word)
456
+ if len(word) > 4 and word not in common_words:
457
+ word_freq[word] = word_freq.get(word, 0) + 1
458
+
459
+ top_topics = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:5]
460
+ topics = [topic[0].title() for topic in top_topics]
461
+
462
+ return topics[:5]
463
 
464
  def _determine_complexity(self, text: str) -> str:
465
  """Determine content complexity level"""
466
  text_lower = text.lower()
467
 
468
+ if "complexity:" in text_lower:
469
+ for level in ["beginner", "intermediate", "advanced"]:
470
+ if level in text_lower.split("complexity:")[1][:100]:
471
+ return level
472
+
473
+ # Heuristic based on keywords
474
  if any(word in text_lower for word in ['basic', 'introduction', 'beginner', 'simple']):
475
  return "beginner"
476
  elif any(word in text_lower for word in ['advanced', 'complex', 'sophisticated', 'expert']):
 
484
  style: str,
485
  duration_minutes: int
486
  ) -> PodcastScript:
487
+ """Generate podcast script from analysis"""
 
 
 
 
 
 
 
 
 
 
 
488
  target_words = duration_minutes * self.WORDS_PER_MINUTE
489
 
490
+ # Prepare context with insights
491
+ insights_text = "\n".join(f"{i+1}. {insight}" for i, insight in enumerate(analysis.key_insights))
 
 
 
 
 
 
 
 
 
492
 
493
+ # Get prompt template
494
  prompt_template = self.SCRIPT_PROMPTS.get(style, self.SCRIPT_PROMPTS["conversational"])
495
 
496
+ # Fill template
497
  prompt = prompt_template.format(
498
+ document_content=analysis.summary,
499
+ key_insights=insights_text,
500
  duration_minutes=duration_minutes,
501
  word_count=target_words
502
  )
503
 
504
+ # Generate script
505
  script_text = await self.llm_service.generate_text(
506
  prompt,
507
+ max_tokens=target_words * 2,
508
+ temperature=0.8
509
  )
510
 
511
+ # Parse into dialogue
512
  dialogue = self._parse_script(script_text)
513
 
514
+ if not dialogue:
515
+ raise ValueError("Failed to parse script into dialogue lines")
516
+
517
  word_count = sum(len(line.text.split()) for line in dialogue)
518
  duration_estimate = word_count / self.WORDS_PER_MINUTE
519
 
520
  return PodcastScript(
521
  dialogue=dialogue,
522
+ total_duration_estimate=duration_estimate * 60,
523
  word_count=word_count,
524
  style=style
525
  )
 
534
  if not line:
535
  continue
536
 
 
537
  if line.startswith('HOST1:'):
538
  text = line[6:].strip()
539
  if text:
 
546
  return dialogue
547
 
548
  def _get_voice_id(self, voice_name: str) -> str:
549
+ """Get voice ID from voice name"""
 
 
 
 
 
 
 
 
 
550
  try:
551
+ # Use cache if available
552
+ if not self._voice_cache:
553
+ voices = self.elevenlabs_client.voices.get_all()
554
+ if not voices or not voices.voices:
555
+ raise RuntimeError("No voices available")
556
+
557
+ for voice in voices.voices:
558
+ self._voice_cache[voice.name.lower()] = voice.voice_id
559
+
560
+ # Exact match
561
+ if voice_name.lower() in self._voice_cache:
562
+ return self._voice_cache[voice_name.lower()]
563
+
564
+ # Partial match
565
+ for name, voice_id in self._voice_cache.items():
566
+ if voice_name.lower() in name:
567
+ logger.info(f"Partial match for '{voice_name}': {name}")
568
+ return voice_id
569
+
570
+ # Fallback
571
+ first_voice_id = list(self._voice_cache.values())[0]
572
+ logger.warning(f"Voice '{voice_name}' not found, using default")
573
+ return first_voice_id
574
 
575
  except Exception as e:
576
+ logger.error(f"Could not fetch voices: {e}")
577
  raise RuntimeError(f"Failed to get voice ID: {str(e)}")
578
 
579
  async def synthesize_audio(
 
583
  host1_voice: str,
584
  host2_voice: str
585
  ) -> Path:
586
+ """Synthesize audio with alternating voices"""
 
 
 
 
 
 
 
 
 
 
 
587
  if not self.elevenlabs_client:
588
  raise RuntimeError("ElevenLabs client not initialized")
589
 
590
  audio_file = self.podcast_dir / f"{podcast_id}.mp3"
591
 
 
 
 
 
 
 
 
592
  try:
593
+ # Get voice IDs
594
+ host1_voice_id = self._get_voice_id(host1_voice)
595
+ host2_voice_id = self._get_voice_id(host2_voice)
596
 
597
+ logger.info(f"HOST1: {host1_voice}, HOST2: {host2_voice}")
598
 
599
+ voice_map = {
600
+ "HOST1": host1_voice_id,
601
+ "HOST2": host2_voice_id
602
+ }
 
 
603
 
604
+ audio_chunks = []
605
+
606
+ # Process each line with correct voice
607
+ for i, line in enumerate(script.dialogue):
608
+ logger.info(f"Line {i+1}/{len(script.dialogue)}: {line.speaker}")
609
+
610
+ voice_id = voice_map.get(line.speaker, host1_voice_id)
611
+
612
+ audio_generator = self.elevenlabs_client.text_to_speech.convert(
613
+ voice_id=voice_id,
614
+ text=line.text,
615
+ model_id="eleven_multilingual_v2"
616
+ )
617
+
618
+ line_chunks = []
619
  for chunk in audio_generator:
620
  if chunk:
621
+ line_chunks.append(chunk)
622
+
623
+ if line_chunks:
624
+ audio_chunks.append(b''.join(line_chunks))
625
+
626
+ if not audio_chunks:
627
+ raise RuntimeError("No audio chunks generated")
628
+
629
+ full_audio = b''.join(audio_chunks)
630
+
631
+ with open(audio_file, 'wb') as f:
632
+ f.write(full_audio)
633
 
 
634
  if audio_file.exists() and audio_file.stat().st_size > 1000:
635
+ logger.info(f"Audio created: {audio_file} ({audio_file.stat().st_size} bytes)")
636
  return audio_file
637
  else:
638
+ raise RuntimeError("Audio file too small or empty")
639
 
640
  except Exception as e:
641
  logger.error(f"Audio synthesis failed: {e}", exc_info=True)
642
+ raise RuntimeError(f"Failed to generate audio: {str(e)}")
643
 
644
  def _create_metadata(
645
  self,
 
652
  style: str
653
  ) -> PodcastMetadata:
654
  """Create podcast metadata"""
 
655
  title = f"Podcast: {analysis.topics[0] if analysis.topics else 'Document Discussion'}"
656
+ description = f"A {style} podcast discussing: {', '.join(analysis.source_documents)}"
 
 
 
 
657
  file_size_mb = audio_path.stat().st_size / (1024 * 1024) if audio_path.exists() else 0
658
 
659
+ llm_cost = (script.word_count / 1000) * 0.01
660
+ tts_cost = (script.word_count * 5 / 1000) * 0.30
 
661
 
662
  return PodcastMetadata(
663
  podcast_id=podcast_id,
664
  title=title,
665
  description=description,
666
+ source_documents=analysis.source_documents,
667
  style=style,
668
  duration_seconds=script.total_duration_estimate,
669
  file_size_mb=file_size_mb,
670
+ voices={"host1": list(voices)[0] if len(voices) > 0 else "Rachel",
671
  "host2": list(voices)[1] if len(voices) > 1 else "Adam"},
672
  generated_at=datetime.now().isoformat(),
673
  generation_cost={"llm_cost": llm_cost, "tts_cost": tts_cost, "total": llm_cost + tts_cost},
 
677
  def _save_metadata(self, metadata: PodcastMetadata):
678
  """Save metadata to database"""
679
  try:
680
+ import json
681
  existing = json.loads(self.metadata_file.read_text())
 
 
682
  existing.append(asdict(metadata))
 
 
683
  self.metadata_file.write_text(json.dumps(existing, indent=2))
684
+ logger.info(f"Metadata saved: {metadata.podcast_id}")
 
 
685
  except Exception as e:
686
  logger.error(f"Failed to save metadata: {e}")
687
 
688
  def list_podcasts(self, limit: int = 10) -> List[PodcastMetadata]:
689
  """List generated podcasts"""
690
  try:
691
+ import json
692
  data = json.loads(self.metadata_file.read_text())
693
  podcasts = [PodcastMetadata(**item) for item in data[-limit:]]
694
+ return list(reversed(podcasts))
695
  except Exception as e:
696
  logger.error(f"Failed to list podcasts: {e}")
697
  return []
 
699
  def get_podcast(self, podcast_id: str) -> Optional[PodcastMetadata]:
700
  """Get specific podcast metadata"""
701
  try:
702
+ import json
703
  data = json.loads(self.metadata_file.read_text())
704
  for item in data:
705
  if item.get('podcast_id') == podcast_id:
 
707
  return None
708
  except Exception as e:
709
  logger.error(f"Failed to get podcast: {e}")
710
+ return None