litestar-org
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/examples/litestar_duckllm.py
Lines changed: 1 addition & 0 deletions b/‎docs/examples/litestar_duckllm.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/examples/litestar_multi_db.py
Lines changed: 1 addition & 0 deletions b/‎docs/examples/litestar_multi_db.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/examples/service_example.py
Lines changed: 1 addition & 0 deletions b/‎docs/examples/service_example.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/examples/standalone_demo.py
Lines changed: 1 addition & 0 deletions b/‎docs/examples/standalone_demo.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎sqlspec/adapters/aiosqlite/driver.py
Lines changed: 16 additions & 11 deletions b/‎sqlspec/adapters/aiosqlite/driver.py
Lines changed: 16 additions & 11 deletions
diff --git a/‎sqlspec/adapters/bigquery/driver.py
Lines changed: 113 additions & 21 deletions b/‎sqlspec/adapters/bigquery/driver.py
Lines changed: 113 additions & 21 deletions
diff --git a/‎sqlspec/adapters/duckdb/driver.py
Lines changed: 18 additions & 13 deletions b/‎sqlspec/adapters/duckdb/driver.py
Lines changed: 18 additions & 13 deletions
@@ -17,7 +17,7 @@ repos:
       - id: mixed-line-ending
       - id: trailing-whitespace
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: "v0.12.0"
+    rev: "v0.12.1"
     hooks:
       - id: ruff
         args: ["--fix"]
 
@@ -1,3 +1,4 @@
+# type: ignore
 """Litestar DuckLLM
 
 This example demonstrates how to use the Litestar framework with the DuckLLM extension.
 
@@ -1,3 +1,4 @@
+# type: ignore
 """Litestar Multi DB
 
 This example demonstrates how to use multiple databases in a Litestar application.
 
@@ -1,3 +1,4 @@
+# type: ignore
 """Example demonstrating the high-level service layer.
 
 This example shows how to use the DatabaseService and AsyncDatabaseService
 
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# type: ignore
 # /// script
 # dependencies = [
 #   "sqlspec[duckdb,performance]",
 
@@ -203,8 +203,7 @@ async def _execute_script(
         return result
 
     async def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
-        """Database-specific bulk load implementation."""
-        # TODO: convert this to use the storage backend.  it has async support
+        """Database-specific bulk load implementation using storage backend."""
         if format != "csv":
             msg = f"aiosqlite driver only supports CSV for bulk loading, not {format}."
             raise NotImplementedError(msg)
@@ -215,15 +214,21 @@ async def _bulk_load_file(self, file_path: Path, table_name: str, format: str, m
                 if mode == "replace":
                     await cursor.execute(f"DELETE FROM {table_name}")
 
-                # Using sync file IO here as it's a fallback path and aiofiles is not a dependency
-                with Path(file_path).open(encoding="utf-8") as f:  # noqa: ASYNC230
-                    reader = csv.reader(f, **options)
-                    header = next(reader)  # Skip header
-                    placeholders = ", ".join("?" for _ in header)
-                    sql = f"INSERT INTO {table_name} VALUES ({placeholders})"
-                    data_iter = list(reader)
-                    await cursor.executemany(sql, data_iter)
-                    rowcount = cursor.rowcount
+                # Use async storage backend to read the file
+                file_path_str = str(file_path)
+                backend = self._get_storage_backend(file_path_str)
+                content = await backend.read_text_async(file_path_str, encoding="utf-8")
+                # Parse CSV content
+                import io
+
+                csv_file = io.StringIO(content)
+                reader = csv.reader(csv_file, **options)
+                header = next(reader)  # Skip header
+                placeholders = ", ".join("?" for _ in header)
+                sql = f"INSERT INTO {table_name} VALUES ({placeholders})"
+                data_iter = list(reader)
+                await cursor.executemany(sql, data_iter)
+                rowcount = cursor.rowcount
                 await conn.commit()
                 return rowcount
         finally:
 
@@ -1,17 +1,21 @@
+import contextlib
 import datetime
 import io
 import logging
+import uuid
 from collections.abc import Iterator
 from decimal import Decimal
 from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union, cast
 
 from google.cloud.bigquery import (
     ArrayQueryParameter,
     Client,
+    ExtractJobConfig,
     LoadJobConfig,
     QueryJob,
     QueryJobConfig,
     ScalarQueryParameter,
+    SourceFormat,
     WriteDisposition,
 )
 from google.cloud.bigquery.table import Row as BigQueryRow
@@ -32,6 +36,8 @@
 from sqlspec.utils.serializers import to_json
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     from sqlglot.dialects.dialect import DialectType
 
 
@@ -258,23 +264,17 @@ def _run_query_job(
                     param_value,
                     type(param_value),
                 )
-        # Let BigQuery generate the job ID to avoid collisions
-        # This is the recommended approach for production code and works better with emulators
-        logger.warning("About to send to BigQuery - SQL: %r", sql_str)
-        logger.warning("Query parameters in job config: %r", final_job_config.query_parameters)
         query_job = conn.query(sql_str, job_config=final_job_config)
 
         # Get the auto-generated job ID for callbacks
         if self.on_job_start and query_job.job_id:
-            try:
+            with contextlib.suppress(Exception):
+                # Callback errors should not interfere with job execution
                 self.on_job_start(query_job.job_id)
-            except Exception as e:
-                logger.warning("Job start callback failed: %s", str(e), extra={"adapter": "bigquery"})
         if self.on_job_complete and query_job.job_id:
-            try:
+            with contextlib.suppress(Exception):
+                # Callback errors should not interfere with job execution
                 self.on_job_complete(query_job.job_id, query_job)
-            except Exception as e:
-                logger.warning("Job complete callback failed: %s", str(e), extra={"adapter": "bigquery"})
 
         return query_job
 
@@ -529,28 +529,120 @@ def _connection(self, connection: "Optional[Client]" = None) -> "Client":
     # BigQuery Native Export Support
     # ============================================================================
 
-    def _export_native(self, query: str, destination_uri: str, format: str, **options: Any) -> int:
-        """BigQuery native export implementation.
+    def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
+        """BigQuery native export implementation with automatic GCS staging.
 
-        For local files, BigQuery doesn't support direct export, so we raise NotImplementedError
-        to trigger the fallback mechanism that uses fetch + write.
+        For GCS URIs, uses direct export. For other locations, automatically stages
+        through a temporary GCS location and transfers to the final destination.
 
         Args:
             query: SQL query to execute
-            destination_uri: Destination URI (local file path or gs:// URI)
+            destination_uri: Destination URI (local file path, gs:// URI, or Path object)
             format: Export format (parquet, csv, json, avro)
-            **options: Additional export options
+            **options: Additional export options including 'gcs_staging_bucket'
 
         Returns:
             Number of rows exported
 
         Raises:
-            NotImplementedError: Always, to trigger fallback to fetch + write
+            NotImplementedError: If no staging bucket is configured for non-GCS destinations
         """
-        # BigQuery only supports native export to GCS, not local files
-        # By raising NotImplementedError, the mixin will fall back to fetch + write
-        msg = "BigQuery native export only supports GCS URIs, using fallback for local files"
-        raise NotImplementedError(msg)
+        destination_str = str(destination_uri)
+
+        # If it's already a GCS URI, use direct export
+        if destination_str.startswith("gs://"):
+            return self._export_to_gcs_native(query, destination_str, format, **options)
+
+        # For non-GCS destinations, check if staging is configured
+        staging_bucket = options.get("gcs_staging_bucket") or getattr(self.config, "gcs_staging_bucket", None)
+        if not staging_bucket:
+            # Fall back to fetch + write for non-GCS destinations without staging
+            msg = "BigQuery native export requires GCS staging bucket for non-GCS destinations"
+            raise NotImplementedError(msg)
+
+        # Generate temporary GCS path
+        from datetime import timezone
+
+        timestamp = datetime.datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+        temp_filename = f"bigquery_export_{timestamp}_{uuid.uuid4().hex[:8]}.{format}"
+        temp_gcs_uri = f"gs://{staging_bucket}/temp_exports/{temp_filename}"
+
+        try:
+            # Export to temporary GCS location
+            rows_exported = self._export_to_gcs_native(query, temp_gcs_uri, format, **options)
+
+            # Transfer from GCS to final destination using storage backend
+            backend, path = self._resolve_backend_and_path(destination_str)
+            gcs_backend = self._get_storage_backend(temp_gcs_uri)
+
+            # Download from GCS and upload to final destination
+            data = gcs_backend.read_bytes(temp_gcs_uri)
+            backend.write_bytes(path, data)
+
+            return rows_exported
+        finally:
+            # Clean up temporary file
+            try:
+                gcs_backend = self._get_storage_backend(temp_gcs_uri)
+                gcs_backend.delete(temp_gcs_uri)
+            except Exception as e:
+                logger.warning("Failed to clean up temporary GCS file %s: %s", temp_gcs_uri, e)
+
+    def _export_to_gcs_native(self, query: str, gcs_uri: str, format: str, **options: Any) -> int:
+        """Direct BigQuery export to GCS.
+
+        Args:
+            query: SQL query to execute
+            gcs_uri: GCS destination URI (must start with gs://)
+            format: Export format (parquet, csv, json, avro)
+            **options: Additional export options
+
+        Returns:
+            Number of rows exported
+        """
+        # First, run the query and store results in a temporary table
+
+        temp_table_id = f"temp_export_{uuid.uuid4().hex[:8]}"
+        dataset_id = getattr(self.connection, "default_dataset", None) or options.get("dataset", "temp")
+
+        # Create a temporary table with query results
+        query_with_table = f"CREATE OR REPLACE TABLE `{dataset_id}.{temp_table_id}` AS {query}"
+        create_job = self._run_query_job(query_with_table, [])
+        create_job.result()
+
+        # Get row count
+        count_query = f"SELECT COUNT(*) as cnt FROM `{dataset_id}.{temp_table_id}`"
+        count_job = self._run_query_job(count_query, [])
+        count_result = list(count_job.result())
+        row_count = count_result[0]["cnt"] if count_result else 0
+
+        try:
+            # Configure extract job
+            extract_config = ExtractJobConfig(**options)  # type: ignore[no-untyped-call]
+
+            # Set format
+            format_mapping = {
+                "parquet": SourceFormat.PARQUET,
+                "csv": SourceFormat.CSV,
+                "json": SourceFormat.NEWLINE_DELIMITED_JSON,
+                "avro": SourceFormat.AVRO,
+            }
+            extract_config.destination_format = format_mapping.get(format, SourceFormat.PARQUET)
+
+            # Extract table to GCS
+            table_ref = self.connection.dataset(dataset_id).table(temp_table_id)
+            extract_job = self.connection.extract_table(table_ref, gcs_uri, job_config=extract_config)
+            extract_job.result()
+
+            return row_count
+        finally:
+            # Clean up temporary table
+            try:
+                delete_query = f"DROP TABLE IF EXISTS `{dataset_id}.{temp_table_id}`"
+                delete_job = self._run_query_job(delete_query, [])
+                delete_job.result()
+            except Exception as e:
+                logger.warning("Failed to clean up temporary table %s: %s", temp_table_id, e)
 
     # ============================================================================
     # BigQuery Native Arrow Support
 
@@ -2,6 +2,7 @@
 import uuid
 from collections.abc import Generator
 from contextlib import contextmanager
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast
 
 from duckdb import DuckDBPyConnection
@@ -251,7 +252,7 @@ def _has_native_capability(self, operation: str, uri: str = "", format: str = ""
                 return True
         return False
 
-    def _export_native(self, query: str, destination_uri: str, format: str, **options: Any) -> int:
+    def _export_native(self, query: str, destination_uri: Union[str, Path], format: str, **options: Any) -> int:
         conn = self._connection(None)
         copy_options: list[str] = []
 
@@ -283,19 +284,21 @@ def _export_native(self, query: str, destination_uri: str, format: str, **option
             raise ValueError(msg)
 
         options_str = f"({', '.join(copy_options)})" if copy_options else ""
-        copy_sql = f"COPY ({query}) TO '{destination_uri}' {options_str}"
+        copy_sql = f"COPY ({query}) TO '{destination_uri!s}' {options_str}"
         result_rel = conn.execute(copy_sql)
         result = result_rel.fetchone() if result_rel else None
         return result[0] if result else 0
 
-    def _import_native(self, source_uri: str, table_name: str, format: str, mode: str, **options: Any) -> int:
+    def _import_native(
+        self, source_uri: Union[str, Path], table_name: str, format: str, mode: str, **options: Any
+    ) -> int:
         conn = self._connection(None)
         if format == "parquet":
-            read_func = f"read_parquet('{source_uri}')"
+            read_func = f"read_parquet('{source_uri!s}')"
         elif format == "csv":
-            read_func = f"read_csv_auto('{source_uri}')"
+            read_func = f"read_csv_auto('{source_uri!s}')"
         elif format == "json":
-            read_func = f"read_json_auto('{source_uri}')"
+            read_func = f"read_json_auto('{source_uri!s}')"
         else:
             msg = f"Unsupported format for DuckDB native import: {format}"
             raise ValueError(msg)
@@ -320,16 +323,16 @@ def _import_native(self, source_uri: str, table_name: str, format: str, mode: st
         return int(count_result[0]) if count_result else 0
 
     def _read_parquet_native(
-        self, source_uri: str, columns: Optional[list[str]] = None, **options: Any
+        self, source_uri: Union[str, Path], columns: Optional[list[str]] = None, **options: Any
     ) -> "SQLResult[dict[str, Any]]":
         conn = self._connection(None)
         if isinstance(source_uri, list):
             file_list = "[" + ", ".join(f"'{f}'" for f in source_uri) + "]"
             read_func = f"read_parquet({file_list})"
-        elif "*" in source_uri or "?" in source_uri:
-            read_func = f"read_parquet('{source_uri}')"
+        elif "*" in str(source_uri) or "?" in str(source_uri):
+            read_func = f"read_parquet('{source_uri!s}')"
         else:
-            read_func = f"read_parquet('{source_uri}')"
+            read_func = f"read_parquet('{source_uri!s}')"
 
         column_list = ", ".join(columns) if columns else "*"
         query = f"SELECT {column_list} FROM {read_func}"
@@ -353,7 +356,9 @@ def _read_parquet_native(
             statement=SQL(query), data=rows, column_names=column_names, rows_affected=num_rows, operation_type="SELECT"
         )
 
-    def _write_parquet_native(self, data: Union[str, "ArrowTable"], destination_uri: str, **options: Any) -> None:
+    def _write_parquet_native(
+        self, data: Union[str, "ArrowTable"], destination_uri: Union[str, Path], **options: Any
+    ) -> None:
         conn = self._connection(None)
         copy_options: list[str] = ["FORMAT PARQUET"]
         if "compression" in options:
@@ -364,13 +369,13 @@ def _write_parquet_native(self, data: Union[str, "ArrowTable"], destination_uri:
         options_str = f"({', '.join(copy_options)})"
 
         if isinstance(data, str):
-            copy_sql = f"COPY ({data}) TO '{destination_uri}' {options_str}"
+            copy_sql = f"COPY ({data}) TO '{destination_uri!s}' {options_str}"
             conn.execute(copy_sql)
         else:
             temp_name = f"_arrow_data_{uuid.uuid4().hex[:8]}"
             conn.register(temp_name, data)
             try:
-                copy_sql = f"COPY {temp_name} TO '{destination_uri}' {options_str}"
+                copy_sql = f"COPY {temp_name} TO '{destination_uri!s}' {options_str}"
                 conn.execute(copy_sql)
             finally:
                 with contextlib.suppress(Exception):
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# type: ignore`
`1`	`2`	`"""Litestar DuckLLM`
`2`	`3`
`3`	`4`	`This example demonstrates how to use the Litestar framework with the DuckLLM extension.`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# type: ignore`
`1`	`2`	`"""Litestar Multi DB`
`2`	`3`
`3`	`4`	`This example demonstrates how to use multiple databases in a Litestar application.`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# type: ignore`
`1`	`2`	`"""Example demonstrating the high-level service layer.`
`2`	`3`
`3`	`4`	`This example shows how to use the DatabaseService and AsyncDatabaseService`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`#!/usr/bin/env python3`
	`2`	`+# type: ignore`
`2`	`3`	`# /// script`
`3`	`4`	`# dependencies = [`
`4`	`5`	`# "sqlspec[duckdb,performance]",`