| """ |
| Base classes for context chunking components. |
| """ |
|
|
| from abc import ABC, abstractmethod |
| from typing import List, Dict, Any, Optional |
|
|
| class Chunk: |
| """Representation of a text chunk with metadata.""" |
| |
| def __init__( |
| self, |
| content: str, |
| chunk_id: str, |
| document_id: Optional[str] = None, |
| metadata: Optional[Dict[str, Any]] = None, |
| ): |
| """ |
| Initialize a chunk. |
| |
| Args: |
| content: The text content of the chunk |
| chunk_id: Unique identifier for the chunk |
| document_id: Optional ID of the source document |
| metadata: Optional metadata for the chunk |
| """ |
| self.content = content |
| self.chunk_id = chunk_id |
| self.document_id = document_id |
| self.metadata = metadata or {} |
| self.embedding = None |
|
|
| class BaseChunker(ABC): |
| """Base class for content chunking components.""" |
| |
| @abstractmethod |
| def chunk( |
| self, |
| content: str, |
| metadata: Optional[Dict[str, Any]] = None, |
| document_id: Optional[str] = None |
| ) -> List[Chunk]: |
| """ |
| Split content into chunks. |
| |
| Args: |
| content: Content to be chunked |
| metadata: Optional metadata to associate with chunks |
| document_id: Optional document ID to associate with chunks |
| |
| Returns: |
| chunks: List of Chunk objects |
| """ |
| pass |
|
|