Documentation

FilesResource

Access via client.files

Sync + async parity

Every method below is also available on SyncScopix.files with identical signatures (minus the async / await). The sync wrappers are generated from the async resource, so behavior, parameter names, and return types match exactly.

Upload

upload()

Unified single-file upload. Auto-selects the best strategy by file size: streaming (< 50 MB), presigned single-shot PUT (< 5 GB), or multipart (>= 5 GB). Force a strategy via strategy=.

python
async def upload(
file: Union[str, Path, bytes],
*,
filename: Optional[str] = None, # required when file is bytes
content_type: Optional[str] = None, # inferred from filename if omitted
strategy: Literal["auto", "streaming", "presigned", "multipart"] = "auto",
title: Optional[str] = None,
tags: Optional[list[str]] = None,
folder_id: Optional[str] = None,
project_id: Optional[str] = None,
content_category: Union[ContentCategory, str, None] = None,
storage_target: Union[StorageTarget, str] = StorageTarget.DEFAULT,
skip_duplicates: bool = False,
auto_describe: bool = True,
custom_schema_id: Optional[str] = None,
compliance_type: Optional[str] = None,
compliance_standard: Optional[str] = None,
compliance_image_type: str = "main",
# presigned/multipart only
file_hash: Optional[str] = None,
idempotency_key: Optional[str] = None,
part_size: int = 8 * 1024 * 1024, # S3 min 5 MB
# progress / retry
on_progress: Optional[Callable[[UploadProgressEvent], None]] = None,
max_concurrency: Optional[int] = None,
max_retries: int = 3,
) -> UploadResult

upload_streaming()

Force a streaming (single multipart POST) upload. Server rejects files > 100 MB.

python
async def upload_streaming(
file: Union[str, Path, bytes],
**kwargs, # same keyword args as upload()
) -> UploadResult

upload_presigned()

Force a single-shot presigned PUT to S3. Suitable for files up to ~5 GB.

python
async def upload_presigned(
file: Union[str, Path, bytes],
**kwargs, # same keyword args as upload()
) -> UploadResult

upload_multipart()

Force an S3 multipart upload. Required for files > 5 GB; recommended for slow/flaky networks because each part is retried independently.

python
async def upload_multipart(
file: Union[str, Path, bytes],
**kwargs, # same keyword args as upload()
) -> UploadResult

upload_batch()

Upload many files in a single server-orchestrated streaming batch. Small batches return immediately; larger ones return a session_id for progress tracking. All files go through streaming — use individual upload() calls for very large files.

python
async def upload_batch(
files: list[Union[str, Path, bytes]],
*,
filenames: Optional[list[str]] = None,
title: Optional[str] = None,
tags: Optional[list[str]] = None,
folder_id: Optional[str] = None,
project_id: Optional[str] = None,
content_category: Union[ContentCategory, str, None] = None,
storage_target: Union[StorageTarget, str] = StorageTarget.DEFAULT,
skip_duplicates: bool = False,
auto_describe: bool = True,
custom_schema_id: Optional[str] = None,
compliance_type: Optional[str] = None,
compliance_standard: Optional[str] = None,
compliance_image_type: str = "main",
) -> BatchUploadResults

check_quota()

Pre-check whether the tenant can accept file_count new uploads.

python
async def check_quota(file_count: int = 1) -> QuotaInfo

check_duplicates()

Return which SHA-256 hashes already exist in the tenant's file store. Max 250 hashes per call.

python
async def check_duplicates(hashes: list[str]) -> DuplicateCheckResult

get_upload_intent_status()

Inspect the server state of an in-flight presigned / multipart upload.

python
async def get_upload_intent_status(upload_id: str) -> dict[str, Any]

abort_upload()

Abort an in-flight upload. Raises UploadError if the server reports the intent was already in a terminal state.

python
async def abort_upload(upload_id: str, *, reason: str = "") -> None

List & Read

list()

List files with optional filtering and pagination.

python
async def list(
*,
search: Optional[str] = None,
search_mode: str = "all", # "all" | "metadata" | "visible_text"
tags: Optional[list[str]] = None,
date_from: Optional[datetime] = None,
date_to: Optional[datetime] = None,
has_description: Optional[bool] = None,
ids: Optional[list[str]] = None, # max 500
media_types: Optional[list[str]] = None, # "image" | "video" | "document" | "link"
folder_id: Optional[str] = None,
project_id: Optional[str] = None,
compliance_status: Optional[str] = None,
limit: int = 20, # 1-100
offset: int = 0,
sort_by: str = "content_created_at", # "created_at" | "content_created_at" | "title" | "size_bytes"
sort_order: str = "desc", # "asc" | "desc"
) -> FileList
Returns: FileList — items are UserFileSummary (narrow by isinstance).

list_all()

Auto-paginating async iterator that yields every file matching the filter.

python
async def list_all(
*,
search: Optional[str] = None,
search_mode: str = "all",
tags: Optional[list[str]] = None,
date_from: Optional[datetime] = None,
date_to: Optional[datetime] = None,
has_description: Optional[bool] = None,
media_types: Optional[list[str]] = None,
folder_id: Optional[str] = None,
project_id: Optional[str] = None,
sort_by: str = "content_created_at",
sort_order: str = "desc",
page_size: int = 50,
) -> AsyncIterator[UserFileSummary]

get()

Get full details for a single file. format="markdown" is available for documents.

python
async def get(file_id: str, *, format: Optional[str] = None) -> UserFileDetails

get_many()

Fetch full details for multiple files (max 500 ids).

python
async def get_many(ids: list[str]) -> list[UserFileDetails]

Update & Delete

update()

Update metadata. Pass user_description=None to reset to the AI-generated description.

python
async def update(
file_id: str,
*,
title: Optional[str] = None, # max 255 chars
tags: Optional[list[str]] = None, # max 40 tags, each max 50 chars
user_description: Optional[str] = ..., # max 10000 chars; pass None to reset
) -> UpdateFileResult

delete()

Soft-delete a file (recoverable within 30 days).

python
async def delete(file_id: str) -> DeleteFileResult

batch_delete()

Delete up to 100 files in one call.

python
async def batch_delete(file_ids: list[str]) -> BatchDeleteFilesResponse

bulk_delete()

Delete any number of files. Dedupes silently and chunks into batches of 100 internally.

python
async def bulk_delete(file_ids: list[str]) -> BatchDeleteFilesResponse

Download

download()

Download the original file as bytes.

python
async def download(file_id: str) -> bytes

download_url()

Return a short-lived presigned download URL (the 307 redirect target).

python
async def download_url(file_id: str) -> str

download_to_file()

Download a file and write it to disk. Returns the destination path.

python
async def download_to_file(file_id: str, path: Union[str, Path]) -> Path

Image-only operations

get_variant()

Redirect URL for a named image variant. Returns 400 on non-image files.

python
async def get_variant(
file_id: str,
variant_type: str = "medium_750",
# "original" | "tiny_64" | "small_256" | "medium_512" | "medium_750"
# | "large_1024" | "legend_annotated" | "architectural_design_annotated"
) -> str

get_similar()

Find images similar to this one by embedding. Image-only.

python
async def get_similar(
file_id: str,
*,
limit: int = 20, # 1-50
) -> dict[str, Any]

trigger_variants()

Manually (re)queue image variant generation. Response includes task_id, current_status, and skipped_duplicate (true if an in-flight task was reused).

python
async def trigger_variants(file_id: str) -> dict[str, Any]

review_extraction()

Record confirmed / rejected review status per extraction item, plus optional field edits. Must supply at least one of item_reviews or field_edits.

python
async def review_extraction(
file_id: str,
domain_name: str,
*,
item_reviews: Optional[dict[str, str]] = None, # {item_key: "confirmed" | "rejected"}
field_edits: Optional[dict[str, Any]] = None,
) -> ExtractionReviewResult

Document-only operations

get_text()

Full extracted text for a document.

python
async def get_text(file_id: str) -> dict[str, Any]

get_chunks()

Semantic chunks for a document. Pass include_embeddings=True to receive the vector embedding alongside each chunk.

python
async def get_chunks(
file_id: str,
*,
include_embeddings: bool = False,
) -> DocumentChunksResponse

get_digitization()

Per-page OCR elements and bounding boxes for a document.

python
async def get_digitization(file_id: str) -> dict[str, Any]

get_digitization_page()

OCR output for a specific page.

python
async def get_digitization_page(
file_id: str,
page_number: int,
) -> dict[str, Any]

get_digitization_status()

Lightweight progress check for document digitization.

python
async def get_digitization_status(file_id: str) -> dict[str, Any]

Processing status

get_processing_status()

Unified status: text extraction for documents; variants, describe, and color for images.

python
async def get_processing_status(file_id: str) -> DocumentStatusResult

get_upload_status()

Unified upload + processing state for a single file (by image id).

python
async def get_upload_status(image_id: str) -> dict[str, Any]

Search, analyze, export

search()

Semantic search over document chunks. Documents only.

python
async def search(
query: str,
*,
limit: int = 20, # 1-100
similarity_threshold: float = 0.5,
document_ids: Optional[list[str]] = None,
) -> DocumentSearchResponse

analyze()

Synchronous document analyze: upload + wait up to timeout seconds for the full result. 10 MB limit — use upload() for larger files.

python
async def analyze(
file: Union[str, Path, bytes],
*,
filename: Optional[str] = None,
skip_duplicates: bool = False,
timeout: int = 60, # 5-120 seconds
folder_id: Optional[str] = None,
project_id: Optional[str] = None,
) -> dict[str, Any]

analyze_async()

Asynchronous document analyze — returns job_id immediately. 10 MB limit.

python
async def analyze_async(
file: Union[str, Path, bytes],
*,
filename: Optional[str] = None,
skip_duplicates: bool = False,
folder_id: Optional[str] = None,
project_id: Optional[str] = None,
) -> dict[str, Any]

get_export_columns()

List the columns available for file-list exports, grouped by category.

python
async def get_export_columns() -> ExportableColumnsResponse

export()

Export filtered file metadata as CSV, XLSX, or Google Sheets. Columns can be either ExportColumnSpec instances or plain dicts.

python
async def export(
*,
format: str, # "csv" | "xlsx" | "google_sheets"
columns: list[Any], # ExportColumnSpec | dict
file_ids: Optional[list[str]] = None,
folder_id: Optional[str] = None,
include_subfolders: bool = False,
flatten_tags: bool = True,
sheet_name: str = "Files", # max 31 chars
google_sheets_title: Optional[str] = None, # max 200 chars
connection_id: Optional[str] = None, # required when format="google_sheets"
) -> FileExportResult

Batch session tracking

get_session_status()

Progress + status for a batch upload session.

python
async def get_session_status(session_id: str) -> UploadSessionStatus

get_session_results()

Paginated per-file results from a batch session.

python
async def get_session_results(
session_id: str,
*,
limit: int = 50, # 1-100
offset: int = 0,
) -> UploadSessionResults

cancel_session()

Cancel an in-progress batch upload session.

python
async def cancel_session(session_id: str) -> dict[str, Any]

wait_for_session()

Poll a session until it reaches a terminal state (completed, failed, cancelled, expired) or timeout elapses.

python
async def wait_for_session(
session_id: str,
*,
timeout: Optional[float] = None, # defaults to config.polling_timeout
poll_interval: Optional[float] = None, # defaults to config.polling_interval
on_progress: Optional[Callable[[UploadSessionStatus], None]] = None,
) -> UploadSessionStatus

list_stuck_uploads()

Operator-visibility endpoint — list uploads that have been stuck beyond stuck_minutes.

python
async def list_stuck_uploads(
*,
stuck_minutes: int = 30, # >= 1
limit: int = 100, # 1-500
) -> dict[str, Any]

Health

health()

Health probe for the unified file service.

python
async def health() -> dict[str, Any]