Skip to content

FileIO properties missing TOKEN after AuthManager refactor #2544

@martyngigg

Description

@martyngigg

Apache Iceberg version

0.10.0 (latest release)

Please describe the bug 🐞

In v0.9.1 the following script successfully wrote to a local, docker-compose-based Lakekeeper REST catalog with MinIO S3 storage:

from pyiceberg.catalog.rest import RestCatalog
import pyarrow as pa

catalog_name = "default"
catalog_properties = {
    "uri": "http://localhost:58080/iceberg/catalog",
    "warehouse": "playground",
    "credential": "<CLIENT_ID>:<CLIENT_SECRET>",
    "oauth2-server-uri": "http://localhost:58080/auth/realms/iceberg/protocol/openid-connect/token",
    "scope": "lakekeeper"
}
catalog = RestCatalog(catalog_name, **catalog_properties)
df = pa.Table.from_pylist(
    [
        {"city": "Amsterdam", "lat": 52.371807, "long": 4.896029},
        {"city": "San Francisco", "lat": 37.773972, "long": -122.431297},
        {"city": "Drachten", "lat": 53.11254, "long": 6.0989},
        {"city": "Paris", "lat": 48.864716, "long": 2.349014},
    ],
)
namespace, table_name = "default", "cities"
catalog.create_namespace_if_not_exists(namespace)
tbl = catalog.create_table_if_not_exists(f"{namespace}.{table_name}", schema=df.schema)
tbl.append(df)
print(f"FileIO token={tbl.io.properties['token']}")

When the FileIO was created by:

return load_file_io({**self.properties, **properties}, location)

the self.properties of the Catalog already contained a token that was created upon initializing the catalog session.

After the AuthManager refactor a token no longer appears in the Catalog self.properties that is passed to FileIO.__init__ causing the following exception on the tbl.append line:

  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/io/fsspec.py", line 118, in s3v4_rest_signer
    raise SignError(f"Failed to sign request {response.status_code}: {signer_body}") from e
pyiceberg.exceptions.SignError: Failed to sign request 401: {'method': 'PUT', 'region': 'local-01', 'uri': 'http://minio:59000/playground/019994c5-5cba-7341-9a6b-ae48b68fc36b/019994c5-f647-7791-a57c-1fb7b493925b/metadata/snap-1472413044088081430-0-f3944734-cc0a-4006-8802-e340f5b13aa9.avro', 'headers': {'User-Agent': ['aiobotocore/2.24.2 md/Botocore#1.40.18 ua/2.1 os/macos#24.6.0 md/arch#x86_64 lang/python#3.13.3 md/pyimpl#CPython m/b,a,N,D cfg/retry-mode#legacy botocore/1.40.18'], 'Expect': ['100-continue']}}
Full traceback
Traceback (most recent call last):
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/io/fsspec.py", line 115, in s3v4_rest_signer
    response.raise_for_status()
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/requests/models.py", line 1026, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: http://localhost:58080/iceberg/catalog/v1/signer/56d90850-9d15-11f0-b1ba-ef4b76728eaf/tabular-id/019994c5-f647-7791-a57c-1fb7b493925b/v1/aws/s3/sign

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/Users/dmn58364/Code/adp-scripts/test_catalog_access.py", line 91, in <module>
    tbl.append(df)
    ~~~~~~~~~~^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/__init__.py", line 1362, in append
    tx.append(df=df, snapshot_properties=snapshot_properties, branch=branch)
    ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/__init__.py", line 482, in append
    with self._append_snapshot_producer(snapshot_properties, branch=branch) as append_files:
         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/update/__init__.py", line 76, in __exit__
    self.commit()
    ~~~~~~~~~~~^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/update/__init__.py", line 72, in commit
    self._transaction._apply(*self._commit())
                              ~~~~~~~~~~~~^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/table/update/snapshot.py", line 277, in _commit
    with write_manifest_list(
         ~~~~~~~~~~~~~~~~~~~^
        format_version=self._transaction.table_metadata.format_version,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<4 lines>...
        avro_compression=self._compression,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ) as writer:
    ^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/manifest.py", line 1231, in __exit__
    self._writer.__exit__(exc_type, exc_value, traceback)
    ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/avro/file.py", line 277, in __exit__
    self.output_stream.close()
    ~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/spec.py", line 2206, in close
    self.flush(force=True)
    ~~~~~~~~~~^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/spec.py", line 2069, in flush
    if self._upload_chunk(final=force) is not False:
       ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 2449, in _upload_chunk
    self.commit()
    ~~~~~~~~~~~^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 2475, in commit
    write_result = self._call_s3("put_object", **kw, **match)
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 2309, in _call_s3
    return self.fs.call_s3(method, self.s3_additional_kwargs, *kwarglist, **kwargs)
           ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/asyn.py", line 118, in wrapper
    return sync(self.loop, func, *args, **kwargs)
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/asyn.py", line 103, in sync
    raise return_result
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/fsspec/asyn.py", line 56, in _runner
    result[0] = await coro
                ^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 371, in _call_s3
    return await _error_wrapper(
           ^^^^^^^^^^^^^^^^^^^^^
        method, kwargs=additional_kwargs, retries=self.retries
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 146, in _error_wrapper
    raise err
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/s3fs/core.py", line 114, in _error_wrapper
    return await func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/context.py", line 36, in wrapper
    return await func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/client.py", line 406, in _make_api_call
    http, parsed_response = await self._make_request(
                            ^^^^^^^^^^^^^^^^^^^^^^^^^
        operation_model, request_dict, request_context
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/client.py", line 432, in _make_request
    return await self._endpoint.make_request(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        operation_model, request_dict
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/endpoint.py", line 116, in _send_request
    request = await self.create_request(request_dict, operation_model)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/endpoint.py", line 104, in create_request
    await self._event_emitter.emit(
    ...<3 lines>...
    )
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/hooks.py", line 68, in _emit
    response = await resolve_awaitable(handler(**kwargs))
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/_helpers.py", line 6, in resolve_awaitable
    return await obj
           ^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/signers.py", line 26, in handler
    return await self.sign(operation_name, request)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/signers.py", line 49, in sign
    await self._event_emitter.emit(
    ...<7 lines>...
    )
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/aiobotocore/hooks.py", line 68, in _emit
    response = await resolve_awaitable(handler(**kwargs))
                                       ~~~~~~~^^^^^^^^^^
  File "/Users/dmn58364/Code/adp-scripts/.venv/lib/python3.13/site-packages/pyiceberg/io/fsspec.py", line 118, in s3v4_rest_signer
    raise SignError(f"Failed to sign request {response.status_code}: {signer_body}") from e
pyiceberg.exceptions.SignError: Failed to sign request 401: {'method': 'PUT', 'region': 'local-01', 'uri': 'http://minio:59000/playground/019994c5-5cba-7341-9a6b-ae48b68fc36b/019994c5-f647-7791-a57c-1fb7b493925b/metadata/snap-1472413044088081430-0-f3944734-cc0a-4006-8802-e340f5b13aa9.avro', 'headers': {'User-Agent': ['aiobotocore/2.24.2 md/Botocore#1.40.18 ua/2.1 os/macos#24.6.0 md/arch#x86_64 lang/python#3.13.3 md/pyimpl#CPython m/b,a,N,D cfg/retry-mode#legacy botocore/1.40.18'], 'Expect': ['100-continue']}}

I can workaround this by adding a token to the **properties passed to RestCatalog.__init__ but I don't think this token would then get refreshed?

Willingness to contribute

  • I can contribute a fix for this bug independently
  • I would be willing to contribute a fix for this bug with guidance from the Iceberg community
  • I cannot contribute a fix for this bug at this time

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions