Documentation

Document Types

Types for document upload, processing, and search operations

DocumentUploadResult

Result of a single document upload (returned by client.files.upload())

python
@dataclass(frozen=True)
class DocumentUploadResult:
document_id: str # Unique document identifier
filename: str # Original filename
content_type: str # MIME type
size_bytes: int # File size in bytes
text_extraction_status: DocumentProcessingStatus = PENDING # Processing status
embedding_status: Optional[str] = None # Search indexing status
page_count: Optional[int] = None # Pages (for PDFs)
chunk_count: Optional[int] = None # Text chunks created
extracted_text: Optional[str] = None # Full extracted text (if available)
created_at: Optional[datetime] = None
processing_error: Optional[str] = None # Error message if failed
processing_error_type: Optional[str] = None # Error classification
processing_is_retryable: Optional[bool] = None # Whether retry may succeed
# Properties
is_failed: bool # text_extraction_status == FAILED
is_completed: bool # text_extraction_status == COMPLETED
is_pending: bool # text_extraction_status in (PENDING, PROCESSING)
def to_dict(self, exclude_none: bool = True) -> dict[str, Any]: ...

BatchDocumentUploadResults

List of DocumentUploadResult with batch helpers (returned by upload())

python
class BatchDocumentUploadResults(list):
# Properties
has_failures: bool # True if any processing failed
failed_count: int # Number of failed results
succeeded_count: int # Number of successful results
pending_count: int # Number still pending
# Methods
def failed(self) -> list[DocumentUploadResult]: ...
def succeeded(self) -> list[DocumentUploadResult]: ...
def pending(self) -> list[DocumentUploadResult]: ...
def retryable(self) -> list[DocumentUploadResult]: ...
def raise_on_failures(self) -> BatchDocumentUploadResults: ...
def summary(self) -> str: ... # e.g. "3 succeeded, 1 failed (1 retryable)"

DocumentItem

Document summary in list responses

python
@dataclass(frozen=True)
class DocumentItem:
id: str # Unique document identifier
filename: str # Original uploaded filename
content_type: str # MIME type
size_bytes: int # File size
text_extraction_status: str # pending | processing | completed | failed
title: Optional[str] = None
page_count: Optional[int] = None # Pages (for PDFs)
embedding_status: Optional[str] = None # Search indexing status
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
folder_id: Optional[str] = None
tags: Optional[list[str]] = None

DocumentDetails

Full document details (from get response)

python
@dataclass(frozen=True)
class DocumentDetails:
id: str
filename: str
content_type: str
size_bytes: int
text_extraction_status: str
title: Optional[str] = None
page_count: Optional[int] = None
embedding_status: Optional[str] = None # Search indexing status
chunk_count: Optional[int] = None # Number of text chunks
extracted_text: Optional[str] = None # Full extracted text
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
folder_id: Optional[str] = None
tags: Optional[list[str]] = None
download_url: Optional[str] = None # Download URL

DocumentList

Paginated list of documents

python
@dataclass(frozen=True)
class DocumentList:
items: list[DocumentItem] # Document summaries
total_count: int # Total matching query
limit: int # Maximum items returned per page
offset: int # Number of items skipped
has_more: bool # More pages available

DocumentChunk

Individual text chunk from a document

python
@dataclass(frozen=True)
class DocumentChunk:
id: str # Chunk identifier
document_id: str # Parent document ID
content: str # Text content
chunk_index: int # Position in document (0-based)
page_numbers: Optional[list[int]] = None # Pages this chunk spans
token_count: Optional[int] = None
embedding: Optional[list[float]] = None # Vector embedding (if requested)

DocumentChunksResponse

Response containing all chunks for a document

python
@dataclass(frozen=True)
class DocumentChunksResponse:
document_id: str # Parent document ID
chunks: list[DocumentChunk] # All chunks
total_chunks: int # Total number of chunks

DocumentSearchResult

Single search result (chunk with relevance score)

python
@dataclass(frozen=True)
class DocumentSearchResult:
chunk_id: str # Chunk identifier
document_id: str # Parent document ID
document_filename: str # Filename of the document
content: str # Matching text content
score: float # Similarity score (0-1)
page_numbers: Optional[list[int]] = None # Pages this chunk spans
chunk_index: Optional[int] = None # Position in document

DocumentSearchResponse

Complete search response

python
@dataclass(frozen=True)
class DocumentSearchResponse:
results: list[DocumentSearchResult] # Matching chunks
total_count: int # Total matches
search_time_ms: int # Search time in ms
query: str # The search query used

DocumentStatusResult

Document processing status (from get_status() or wait_for_processing())

python
@dataclass(frozen=True)
class DocumentStatusResult:
document_id: str
text_extraction_status: str # pending | processing | completed | failed
embedding_status: Optional[str] = None # Search indexing status
page_count: Optional[int] = None
chunk_count: Optional[int] = None
error_message: Optional[str] = None
# Properties
is_completed: bool # text_extraction_status == "completed"
is_failed: bool # text_extraction_status == "failed"
is_processing: bool # text_extraction_status in ("pending", "processing")

DocumentQuotaCheck

Quota check response for document uploads

python
@dataclass(frozen=True)
class DocumentQuotaCheck:
can_proceed: bool # Whether upload can proceed
requested: int # Number of files requested to upload
available: int # Total available upload credits remaining
monthly_limit: int # Monthly quota limit (-1 for unlimited)
current_usage: int # Current usage count this period
prepaid_credits: int = 0 # Additional purchased credits
message: str = ""
# Properties
documents_remaining: int # Number of documents that can still be uploaded

DocumentDeleteResult

Single document deletion result

python
@dataclass(frozen=True)
class DocumentDeleteResult:
id: str # Document identifier
status: str # Deletion status (deleted/failed)
message: str = "" # Additional information
deleted_at: Optional[datetime] = None # Deletion timestamp

DocumentBatchDeleteResponse

Response for batch document deletion

python
@dataclass(frozen=True)
class DocumentBatchDeleteResponse:
deleted: list[DocumentDeleteResult] # Successfully deleted
failed: list[DocumentDeleteResult] # Failed to delete
summary: dict[str, int] # {total, deleted, failed}

DocumentProcessingStatus

Enum for document processing status

python
class DocumentProcessingStatus(str, Enum):
PENDING = "pending" # Uploaded, not yet started
PROCESSING = "processing" # Text extraction in progress
COMPLETED = "completed" # Processing completed
FAILED = "failed" # Processing failed

DocumentProcessingErrorType

Classification of document processing errors for retry strategies

python
class DocumentProcessingErrorType(str, Enum):
TIMEOUT = "timeout" # Processing timed out (retryable)
EXTRACTION_ERROR = "extraction_error" # Text extraction failed (may be retryable)
UNSUPPORTED_FORMAT = "unsupported_format" # Format not supported (permanent)
CORRUPT_FILE = "corrupt_file" # File is corrupted (permanent)
PASSWORD_PROTECTED = "password_protected" # Requires password (permanent)
TOO_LARGE = "too_large" # Exceeds size limits (permanent)
EMBEDDING_ERROR = "embedding_error" # Search indexing failed (retryable)
RESOURCE_LIMIT = "resource_limit" # Quota exceeded (permanent)
UNKNOWN = "unknown" # Unclassified (not retryable)

DocumentProcessingFailure

Details about a failed document processing operation with error classification

python
@dataclass(frozen=True)
class DocumentProcessingFailure:
document_id: str # ID of the document that failed
error_message: str # Human-readable error message
error_type: DocumentProcessingErrorType # Classification of the error
is_retryable: bool # Whether the operation can be retried
filename: Optional[str] = None # Original filename
# Class methods
@staticmethod
def classify_error(error_msg: str) -> tuple[DocumentProcessingErrorType, bool]: ...
@classmethod
def from_error_message(cls, document_id: str, error_msg: str, filename: Optional[str] = None) -> DocumentProcessingFailure: ...