| """ |
| Example: DocumentAgent with ReAct-style Processing |
| |
| Demonstrates: |
| 1. Loading and processing documents |
| 2. Field extraction with evidence |
| 3. Document classification |
| 4. Question answering with grounding |
| """ |
|
|
| import asyncio |
| from pathlib import Path |
| from loguru import logger |
|
|
| |
| from src.agents.document_agent import ( |
| DocumentAgent, |
| AgentConfig, |
| ) |
| from src.document.schemas.extraction import ( |
| ExtractionSchema, |
| FieldDefinition, |
| ) |
|
|
|
|
| async def example_basic_agent(): |
| """Basic agent usage.""" |
| print("=" * 50) |
| print("Basic DocumentAgent Usage") |
| print("=" * 50) |
|
|
| |
| config = AgentConfig( |
| default_model="llama3.2:3b", |
| max_iterations=10, |
| temperature=0.1, |
| ) |
| agent = DocumentAgent(config) |
|
|
| |
| sample_doc = Path("./data/sample.pdf") |
| if not sample_doc.exists(): |
| print(f"Sample document not found: {sample_doc}") |
| print("Create a sample PDF at ./data/sample.pdf") |
| return |
|
|
| print(f"\nLoading document: {sample_doc}") |
| await agent.load_document(str(sample_doc)) |
|
|
| print(f"Document loaded: {agent.document.metadata.filename}") |
| print(f"Pages: {agent.document.metadata.num_pages}") |
| print(f"Chunks: {len(agent.document.chunks)}") |
|
|
|
|
| async def example_field_extraction(): |
| """Extract structured fields with evidence.""" |
| print("\n" + "=" * 50) |
| print("Field Extraction with Evidence") |
| print("=" * 50) |
|
|
| agent = DocumentAgent() |
|
|
| sample_doc = Path("./data/sample.pdf") |
| if not sample_doc.exists(): |
| print("Sample document not found") |
| return |
|
|
| await agent.load_document(str(sample_doc)) |
|
|
| |
| schema = ExtractionSchema( |
| name="document_info", |
| description="Extract key document information", |
| fields=[ |
| FieldDefinition( |
| name="title", |
| field_type="string", |
| description="Document title", |
| required=True, |
| ), |
| FieldDefinition( |
| name="author", |
| field_type="string", |
| description="Document author or organization", |
| required=False, |
| ), |
| FieldDefinition( |
| name="date", |
| field_type="string", |
| description="Document date", |
| required=False, |
| ), |
| FieldDefinition( |
| name="summary", |
| field_type="string", |
| description="Brief summary of document content", |
| required=True, |
| ), |
| ], |
| ) |
|
|
| |
| print("\nExtracting fields...") |
| result = await agent.extract_fields(schema) |
|
|
| print(f"\nExtracted Fields:") |
| for field, value in result.fields.items(): |
| print(f" {field}: {value}") |
|
|
| print(f"\nConfidence: {result.confidence:.2f}") |
|
|
| if result.evidence: |
| print(f"\nEvidence ({len(result.evidence)} sources):") |
| for ev in result.evidence[:3]: |
| print(f" - Page {ev.page + 1}: {ev.snippet[:80]}...") |
|
|
|
|
| async def example_classification(): |
| """Classify document type.""" |
| print("\n" + "=" * 50) |
| print("Document Classification") |
| print("=" * 50) |
|
|
| agent = DocumentAgent() |
|
|
| sample_doc = Path("./data/sample.pdf") |
| if not sample_doc.exists(): |
| print("Sample document not found") |
| return |
|
|
| await agent.load_document(str(sample_doc)) |
|
|
| |
| print("\nClassifying document...") |
| classification = await agent.classify() |
|
|
| print(f"\nDocument Type: {classification.document_type.value}") |
| print(f"Confidence: {classification.confidence:.2f}") |
| print(f"Reasoning: {classification.reasoning}") |
|
|
| if classification.metadata: |
| print(f"\nAdditional metadata:") |
| for key, value in classification.metadata.items(): |
| print(f" {key}: {value}") |
|
|
|
|
| async def example_question_answering(): |
| """Answer questions about document with evidence.""" |
| print("\n" + "=" * 50) |
| print("Question Answering with Evidence") |
| print("=" * 50) |
|
|
| agent = DocumentAgent() |
|
|
| sample_doc = Path("./data/sample.pdf") |
| if not sample_doc.exists(): |
| print("Sample document not found") |
| return |
|
|
| await agent.load_document(str(sample_doc)) |
|
|
| |
| questions = [ |
| "What is this document about?", |
| "What are the main findings or conclusions?", |
| "Are there any tables or figures? What do they show?", |
| ] |
|
|
| for question in questions: |
| print(f"\nQ: {question}") |
| print("-" * 40) |
|
|
| answer, evidence = await agent.answer_question(question) |
|
|
| print(f"A: {answer}") |
|
|
| if evidence: |
| print(f"\nEvidence:") |
| for ev in evidence[:2]: |
| print(f" - Page {ev.page + 1} ({ev.source_type}): {ev.snippet[:60]}...") |
|
|
|
|
| async def example_react_task(): |
| """Run a complex task with ReAct-style reasoning.""" |
| print("\n" + "=" * 50) |
| print("ReAct-style Task Execution") |
| print("=" * 50) |
|
|
| agent = DocumentAgent() |
|
|
| sample_doc = Path("./data/sample.pdf") |
| if not sample_doc.exists(): |
| print("Sample document not found") |
| return |
|
|
| await agent.load_document(str(sample_doc)) |
|
|
| |
| task = """ |
| Analyze this document and provide: |
| 1. A brief summary of the content |
| 2. The document type and purpose |
| 3. Any key data points or figures mentioned |
| 4. Your confidence in the analysis |
| """ |
|
|
| print(f"\nTask: {task}") |
| print("-" * 40) |
|
|
| |
| result, trace = await agent.run(task) |
|
|
| print(f"\nResult:\n{result}") |
|
|
| print(f"\n--- Agent Trace ---") |
| print(f"Steps: {len(trace.steps)}") |
| print(f"Tools used: {trace.tools_used}") |
| print(f"Total time: {trace.total_time:.2f}s") |
|
|
| |
| print(f"\nReasoning trace:") |
| for i, step in enumerate(trace.steps[:5], 1): |
| print(f"\n[Step {i}] {step.action}") |
| if step.thought: |
| print(f" Thought: {step.thought[:100]}...") |
| if step.observation: |
| print(f" Observation: {step.observation[:100]}...") |
|
|
|
|
| async def main(): |
| """Run all examples.""" |
| await example_basic_agent() |
| await example_field_extraction() |
| await example_classification() |
| await example_question_answering() |
| await example_react_task() |
|
|
|
|
| if __name__ == "__main__": |
| asyncio.run(main()) |
|
|