Documentation
FilesResource
Access via client.files
Sync + async parity
Every method below is also available on SyncScopix.files with identical signatures (minus the async / await). The sync wrappers are generated from the async resource, so behavior, parameter names, and return types match exactly.
Upload
upload()
Unified single-file upload. Auto-selects the best strategy by file size: streaming (< 50 MB), presigned single-shot PUT (< 5 GB), or multipart (>= 5 GB). Force a strategy via strategy=.
async def upload( file: Union[str, Path, bytes], *, filename: Optional[str] = None, # required when file is bytes content_type: Optional[str] = None, # inferred from filename if omitted strategy: Literal["auto", "streaming", "presigned", "multipart"] = "auto", title: Optional[str] = None, tags: Optional[list[str]] = None, folder_id: Optional[str] = None, project_id: Optional[str] = None, content_category: Union[ContentCategory, str, None] = None, storage_target: Union[StorageTarget, str] = StorageTarget.DEFAULT, skip_duplicates: bool = False, auto_describe: bool = True, custom_schema_id: Optional[str] = None, compliance_type: Optional[str] = None, compliance_standard: Optional[str] = None, compliance_image_type: str = "main", # presigned/multipart only file_hash: Optional[str] = None, idempotency_key: Optional[str] = None, part_size: int = 8 * 1024 * 1024, # S3 min 5 MB # progress / retry on_progress: Optional[Callable[[UploadProgressEvent], None]] = None, max_concurrency: Optional[int] = None, max_retries: int = 3,) -> UploadResultupload_streaming()
Force a streaming (single multipart POST) upload. Server rejects files > 100 MB.
async def upload_streaming( file: Union[str, Path, bytes], **kwargs, # same keyword args as upload()) -> UploadResultupload_presigned()
Force a single-shot presigned PUT to S3. Suitable for files up to ~5 GB.
async def upload_presigned( file: Union[str, Path, bytes], **kwargs, # same keyword args as upload()) -> UploadResultupload_multipart()
Force an S3 multipart upload. Required for files > 5 GB; recommended for slow/flaky networks because each part is retried independently.
async def upload_multipart( file: Union[str, Path, bytes], **kwargs, # same keyword args as upload()) -> UploadResultupload_batch()
Upload many files in a single server-orchestrated streaming batch. Small batches return immediately; larger ones return a session_id for progress tracking. All files go through streaming — use individual upload() calls for very large files.
async def upload_batch( files: list[Union[str, Path, bytes]], *, filenames: Optional[list[str]] = None, title: Optional[str] = None, tags: Optional[list[str]] = None, folder_id: Optional[str] = None, project_id: Optional[str] = None, content_category: Union[ContentCategory, str, None] = None, storage_target: Union[StorageTarget, str] = StorageTarget.DEFAULT, skip_duplicates: bool = False, auto_describe: bool = True, custom_schema_id: Optional[str] = None, compliance_type: Optional[str] = None, compliance_standard: Optional[str] = None, compliance_image_type: str = "main",) -> BatchUploadResultscheck_quota()
Pre-check whether the tenant can accept file_count new uploads.
async def check_quota(file_count: int = 1) -> QuotaInfocheck_duplicates()
Return which SHA-256 hashes already exist in the tenant's file store. Max 250 hashes per call.
async def check_duplicates(hashes: list[str]) -> DuplicateCheckResultget_upload_intent_status()
Inspect the server state of an in-flight presigned / multipart upload.
async def get_upload_intent_status(upload_id: str) -> dict[str, Any]abort_upload()
Abort an in-flight upload. Raises UploadError if the server reports the intent was already in a terminal state.
async def abort_upload(upload_id: str, *, reason: str = "") -> NoneList & Read
list()
List files with optional filtering and pagination.
async def list( *, search: Optional[str] = None, search_mode: str = "all", # "all" | "metadata" | "visible_text" tags: Optional[list[str]] = None, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None, has_description: Optional[bool] = None, ids: Optional[list[str]] = None, # max 500 media_types: Optional[list[str]] = None, # "image" | "video" | "document" | "link" folder_id: Optional[str] = None, project_id: Optional[str] = None, compliance_status: Optional[str] = None, limit: int = 20, # 1-100 offset: int = 0, sort_by: str = "content_created_at", # "created_at" | "content_created_at" | "title" | "size_bytes" sort_order: str = "desc", # "asc" | "desc") -> FileListlist_all()
Auto-paginating async iterator that yields every file matching the filter.
async def list_all( *, search: Optional[str] = None, search_mode: str = "all", tags: Optional[list[str]] = None, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None, has_description: Optional[bool] = None, media_types: Optional[list[str]] = None, folder_id: Optional[str] = None, project_id: Optional[str] = None, sort_by: str = "content_created_at", sort_order: str = "desc", page_size: int = 50,) -> AsyncIterator[UserFileSummary]get()
Get full details for a single file. format="markdown" is available for documents.
async def get(file_id: str, *, format: Optional[str] = None) -> UserFileDetailsget_many()
Fetch full details for multiple files (max 500 ids).
async def get_many(ids: list[str]) -> list[UserFileDetails]Update & Delete
update()
Update metadata. Pass user_description=None to reset to the AI-generated description.
async def update( file_id: str, *, title: Optional[str] = None, # max 255 chars tags: Optional[list[str]] = None, # max 40 tags, each max 50 chars user_description: Optional[str] = ..., # max 10000 chars; pass None to reset) -> UpdateFileResultdelete()
Soft-delete a file (recoverable within 30 days).
async def delete(file_id: str) -> DeleteFileResultbatch_delete()
Delete up to 100 files in one call.
async def batch_delete(file_ids: list[str]) -> BatchDeleteFilesResponsebulk_delete()
Delete any number of files. Dedupes silently and chunks into batches of 100 internally.
async def bulk_delete(file_ids: list[str]) -> BatchDeleteFilesResponseDownload
download()
Download the original file as bytes.
async def download(file_id: str) -> bytesdownload_url()
Return a short-lived presigned download URL (the 307 redirect target).
async def download_url(file_id: str) -> strdownload_to_file()
Download a file and write it to disk. Returns the destination path.
async def download_to_file(file_id: str, path: Union[str, Path]) -> PathImage-only operations
get_variant()
Redirect URL for a named image variant. Returns 400 on non-image files.
async def get_variant( file_id: str, variant_type: str = "medium_750", # "original" | "tiny_64" | "small_256" | "medium_512" | "medium_750" # | "large_1024" | "legend_annotated" | "architectural_design_annotated") -> strget_similar()
Find images similar to this one by embedding. Image-only.
async def get_similar( file_id: str, *, limit: int = 20, # 1-50) -> dict[str, Any]trigger_variants()
Manually (re)queue image variant generation. Response includes task_id, current_status, and skipped_duplicate (true if an in-flight task was reused).
async def trigger_variants(file_id: str) -> dict[str, Any]review_extraction()
Record confirmed / rejected review status per extraction item, plus optional field edits. Must supply at least one of item_reviews or field_edits.
async def review_extraction( file_id: str, domain_name: str, *, item_reviews: Optional[dict[str, str]] = None, # {item_key: "confirmed" | "rejected"} field_edits: Optional[dict[str, Any]] = None,) -> ExtractionReviewResultDocument-only operations
get_text()
Full extracted text for a document.
async def get_text(file_id: str) -> dict[str, Any]get_chunks()
Semantic chunks for a document. Pass include_embeddings=True to receive the vector embedding alongside each chunk.
async def get_chunks( file_id: str, *, include_embeddings: bool = False,) -> DocumentChunksResponseget_digitization()
Per-page OCR elements and bounding boxes for a document.
async def get_digitization(file_id: str) -> dict[str, Any]get_digitization_page()
OCR output for a specific page.
async def get_digitization_page( file_id: str, page_number: int,) -> dict[str, Any]get_digitization_status()
Lightweight progress check for document digitization.
async def get_digitization_status(file_id: str) -> dict[str, Any]Processing status
get_processing_status()
Unified status: text extraction for documents; variants, describe, and color for images.
async def get_processing_status(file_id: str) -> DocumentStatusResultget_upload_status()
Unified upload + processing state for a single file (by image id).
async def get_upload_status(image_id: str) -> dict[str, Any]Search, analyze, export
search()
Semantic search over document chunks. Documents only.
async def search( query: str, *, limit: int = 20, # 1-100 similarity_threshold: float = 0.5, document_ids: Optional[list[str]] = None,) -> DocumentSearchResponseanalyze()
Synchronous document analyze: upload + wait up to timeout seconds for the full result. 10 MB limit — use upload() for larger files.
async def analyze( file: Union[str, Path, bytes], *, filename: Optional[str] = None, skip_duplicates: bool = False, timeout: int = 60, # 5-120 seconds folder_id: Optional[str] = None, project_id: Optional[str] = None,) -> dict[str, Any]analyze_async()
Asynchronous document analyze — returns job_id immediately. 10 MB limit.
async def analyze_async( file: Union[str, Path, bytes], *, filename: Optional[str] = None, skip_duplicates: bool = False, folder_id: Optional[str] = None, project_id: Optional[str] = None,) -> dict[str, Any]get_export_columns()
List the columns available for file-list exports, grouped by category.
async def get_export_columns() -> ExportableColumnsResponseexport()
Export filtered file metadata as CSV, XLSX, or Google Sheets. Columns can be either ExportColumnSpec instances or plain dicts.
async def export( *, format: str, # "csv" | "xlsx" | "google_sheets" columns: list[Any], # ExportColumnSpec | dict file_ids: Optional[list[str]] = None, folder_id: Optional[str] = None, include_subfolders: bool = False, flatten_tags: bool = True, sheet_name: str = "Files", # max 31 chars google_sheets_title: Optional[str] = None, # max 200 chars connection_id: Optional[str] = None, # required when format="google_sheets") -> FileExportResultBatch session tracking
get_session_status()
Progress + status for a batch upload session.
async def get_session_status(session_id: str) -> UploadSessionStatusget_session_results()
Paginated per-file results from a batch session.
async def get_session_results( session_id: str, *, limit: int = 50, # 1-100 offset: int = 0,) -> UploadSessionResultscancel_session()
Cancel an in-progress batch upload session.
async def cancel_session(session_id: str) -> dict[str, Any]wait_for_session()
Poll a session until it reaches a terminal state (completed, failed, cancelled, expired) or timeout elapses.
async def wait_for_session( session_id: str, *, timeout: Optional[float] = None, # defaults to config.polling_timeout poll_interval: Optional[float] = None, # defaults to config.polling_interval on_progress: Optional[Callable[[UploadSessionStatus], None]] = None,) -> UploadSessionStatuslist_stuck_uploads()
Operator-visibility endpoint — list uploads that have been stuck beyond stuck_minutes.
async def list_stuck_uploads( *, stuck_minutes: int = 30, # >= 1 limit: int = 100, # 1-500) -> dict[str, Any]Health
health()
Health probe for the unified file service.
async def health() -> dict[str, Any]
