Files
MLPproject/.venv/lib/python3.12/site-packages/tifffile/zarr.py
2025-10-23 15:44:32 +02:00

1593 lines
57 KiB
Python

# tifffile/zarr.py
# Copyright (c) 2008-2025, Christoph Gohlke
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Zarr 3 TIFF and file sequence stores."""
from __future__ import annotations
__all__ = ['ZarrStore', 'ZarrTiffStore', 'ZarrFileSequenceStore']
import asyncio
import json
import os
import sys
import threading
from typing import TYPE_CHECKING
import numpy
import zarr
try:
from zarr.abc.store import ByteRequest, Store
from zarr.core.buffer.cpu import NDBuffer
from zarr.core.chunk_grids import RegularChunkGrid
except ImportError as exc:
raise ValueError(f'zarr {zarr.__version__} < 3 is not supported') from exc
from .tifffile import (
CHUNKMODE,
COMPRESSION,
FileCache,
FileSequence,
NullContext,
TiffFrame,
TiffPage,
TiffPageSeries,
TiledSequence,
create_output,
enumarg,
imread,
jpeg_decode_colorspace,
product,
)
if TYPE_CHECKING:
from collections.abc import (
AsyncIterator,
Callable,
Iterable,
Iterator,
Sequence,
)
from typing import Any, TextIO
from numpy.typing import DTypeLike, NDArray
from zarr.core.buffer import Buffer, BufferPrototype
from zarr.core.indexing import BasicSelection
from .tifffile import ByteOrder, OutputType
class ZarrStore(Store):
"""Zarr 3 store base class.
Parameters:
fillvalue:
Value to use for missing chunks of Zarr store.
The default is 0.
chunkmode:
Specifies how to chunk data.
read_only:
Passed to :py:class:`zarr.abc.store.Store`.
References:
1. https://zarr.readthedocs.io/en/stable/api/zarr/abc/store/
2. https://zarr.readthedocs.io/en/stable/spec/v2.html
3. https://forum.image.sc/t/multiscale-arrays-v0-1/37930
"""
_read_only: bool
_store: dict[str, Any]
_fillvalue: int | float
_chunkmode: int
def __init__(
self,
/,
*,
fillvalue: int | float | None = None,
chunkmode: CHUNKMODE | int | str | None = None,
read_only: bool = True,
) -> None:
super().__init__(read_only=read_only)
self._store = {}
self._fillvalue = 0 if fillvalue is None else fillvalue
if chunkmode is None:
self._chunkmode = CHUNKMODE(0)
else:
self._chunkmode = enumarg(CHUNKMODE, chunkmode)
def __eq__(self, other: object) -> bool:
"""Return whether objects are equal."""
return (
isinstance(other, type(self))
and self._store == other._store
and self._fillvalue == other._fillvalue
and self._chunkmode == other._chunkmode
)
async def get_partial_values(
self,
prototype: BufferPrototype,
key_ranges: Iterable[tuple[str, ByteRequest | None]],
) -> list[Buffer | None]:
"""Return possibly partial values from given key_ranges."""
# print(f'get_partial_values({key_ranges=})')
return [
await self.get(key, prototype, byte_range)
for key, byte_range in key_ranges
]
@property
def supports_writes(self) -> bool:
"""Store supports writes."""
return not self._read_only
def _set(self, key: str, value: Buffer, /) -> None:
"""Store (key, value) pair."""
raise NotImplementedError
async def set(self, key: str, value: Buffer) -> None:
"""Store (key, value) pair."""
self._set(key, value)
@property
def supports_deletes(self) -> bool:
"""Store supports deletes."""
return False
async def delete(self, key: str) -> None:
"""Remove key from store."""
raise PermissionError('ZarrStore does not support deletes')
@property
def supports_listing(self) -> bool:
"""Store supports listing."""
return True
async def list(self) -> AsyncIterator[str]:
"""Return all keys in store."""
for key in self._store:
yield key
async def list_prefix(self, prefix: str) -> AsyncIterator[str]:
"""Return all keys in store that begin with prefix.
Keys are returned relative to the root of the store.
"""
for key in list(self._store):
if key.startswith(prefix):
yield key
async def list_dir(self, prefix: str) -> AsyncIterator[str]:
"""Return all keys and prefixes with prefix.
Keys and prefixes do not contain the character "/" after the given
prefix.
"""
prefix = prefix.rstrip('/')
if prefix == '':
keys_unique = {k.split('/')[0] for k in self._store}
else:
keys_unique = {
key.removeprefix(prefix + '/').split('/')[0]
for key in self._store
if key.startswith(prefix + '/') and key != prefix
}
for key in keys_unique:
yield key
@property
def is_multiscales(self) -> bool:
"""Return whether ZarrStore contains multiscales."""
return b'multiscales' in self._store['.zattrs']
def __repr__(self) -> str:
return f'{self.__class__.__name__}'
# async def _get_many(
# self,
# requests: Iterable[tuple[str, BufferPrototype, ByteRequest | None]]
# ) -> AsyncGenerator[tuple[str, Buffer | None], None]:
# print(f'_get_many({requests=})')
# return super()._get_many(requests)
# async def getsize_prefix(self, prefix: str) -> int:
# print(f'getsize_prefix({prefix=})')
# return super().getsize_prefix(prefix)
class ZarrTiffStore(ZarrStore):
"""Zarr 3 store interface to image array in TiffPage or TiffPageSeries.
ZarrTiffStore is using a TiffFile instance for reading and decoding chunks.
Therefore, ZarrTiffStore instances cannot be pickled.
For writing, image data must be stored in uncompressed, unpredicted,
and unpacked form. Sparse strips and tiles are not written.
Parameters:
arg:
TIFF page or series to wrap as Zarr store.
level:
Pyramidal level to wrap. The default is 0.
chunkmode:
Use strips or tiles (0) or whole page data (2) as chunks.
The default is 0.
fillvalue:
Value to use for missing chunks. The default is 0.
zattrs:
Additional attributes to store in `.zattrs`.
multiscales:
Create a multiscales-compatible Zarr group store.
By default, create a Zarr array store for pages and non-pyramidal
series.
lock:
Reentrant lock to synchronize seeks and reads from file.
By default, the lock of the parent's file handle is used.
squeeze:
Remove length-1 dimensions from shape of TiffPageSeries.
maxworkers:
If `chunkmode=0`, asynchronously run chunk decode function
in separate thread if greater than 1.
If `chunkmode=2`, maximum number of threads to concurrently decode
strips or tiles.
If *None* or *0*, use up to :py:attr:`_TIFF.MAXWORKERS` or
asyncio assigned threads.
buffersize:
Approximate number of bytes to read from file in one pass
if `chunkmode=2`. The default is :py:attr:`_TIFF.BUFFERSIZE`.
read_only:
Passed to :py:class:`zarr.abc.store.Store`.
_openfiles:
Internal API.
"""
_data: list[TiffPageSeries]
_filecache: FileCache
_transform: Callable[[NDArray[Any]], NDArray[Any]] | None
_maxworkers: int
_buffersize: int | None
_squeeze: bool | None
_multiscales: bool
def __init__(
self,
arg: TiffPage | TiffFrame | TiffPageSeries,
/,
*,
level: int | None = None,
chunkmode: CHUNKMODE | int | str | None = None,
fillvalue: int | float | None = None,
zattrs: dict[str, Any] | None = None,
multiscales: bool | None = None,
lock: threading.RLock | NullContext | None = None,
squeeze: bool | None = None,
maxworkers: int | None = None,
buffersize: int | None = None,
read_only: bool | None = None,
_openfiles: int | None = None,
**kwargs: Any,
) -> None:
if chunkmode is None:
self._chunkmode = CHUNKMODE(0)
else:
self._chunkmode = enumarg(CHUNKMODE, chunkmode)
if self._chunkmode not in {0, 2}:
raise NotImplementedError(f'{self._chunkmode!r} not implemented')
self._squeeze = None if squeeze is None else bool(squeeze)
self._buffersize = buffersize
if isinstance(arg, TiffPageSeries):
self._data = arg.levels
self._transform = arg.transform
if multiscales is not None and not multiscales:
level = 0
if level is not None:
self._data = [self._data[level]]
name = arg.name
else:
self._data = [TiffPageSeries([arg])]
self._transform = None
name = 'Unnamed'
if not maxworkers:
maxworkers = self._data[0].keyframe.maxworkers
if maxworkers < 3 and self._chunkmode == 0:
maxworkers = 1
self._maxworkers = maxworkers
fh = self._data[0].keyframe.parent._parent.filehandle
if read_only is None:
read_only = not fh.writable() or self._chunkmode != 0
super().__init__(
fillvalue=fillvalue, chunkmode=self._chunkmode, read_only=read_only
)
if lock is None:
fh.set_lock(True)
lock = fh.lock
self._filecache = FileCache(size=_openfiles, lock=lock)
zattrs = {} if zattrs is None else dict(zattrs)
# TODO: Zarr Encoding Specification
# https://xarray.pydata.org/en/stable/internals/zarr-encoding-spec.html
if multiscales or len(self._data) > 1:
# multiscales
self._multiscales = True
if '_ARRAY_DIMENSIONS' in zattrs:
array_dimensions = zattrs.pop('_ARRAY_DIMENSIONS')
else:
array_dimensions = list(self._data[0].get_axes(squeeze))
self._store['.zgroup'] = _json_dumps({'zarr_format': 2})
self._store['.zattrs'] = _json_dumps(
{
# TODO: use https://ngff.openmicroscopy.org/latest/
'multiscales': [
{
'version': '0.1',
'name': name,
'datasets': [
{'path': str(i)}
for i in range(len(self._data))
],
# 'axes': [...]
# 'type': 'unknown',
'metadata': {},
}
],
**zattrs,
}
)
shape0 = self._data[0].get_shape(squeeze)
for level, series in enumerate(self._data):
keyframe = series.keyframe
keyframe.decode # cache decode function
shape = series.get_shape(squeeze)
dtype = series.dtype
if fillvalue is None:
self._fillvalue = fillvalue = keyframe.nodata
if self._chunkmode:
chunks = keyframe.shape
else:
chunks = keyframe.chunks
self._store[f'{level}/.zattrs'] = _json_dumps(
{
'_ARRAY_DIMENSIONS': [
(f'{ax}{level}' if i != j else ax)
for ax, i, j in zip(
array_dimensions, shape, shape0
)
]
}
)
self._store[f'{level}/.zarray'] = _json_dumps(
{
'zarr_format': 2,
'shape': shape,
'chunks': _chunks(chunks, shape, keyframe.shaped),
'dtype': _dtype_str(dtype),
'compressor': None,
'fill_value': _json_value(fillvalue, dtype),
'order': 'C',
'filters': None,
}
)
if not self._read_only:
self._read_only = not _is_writable(keyframe)
else:
self._multiscales = False
series = self._data[0]
keyframe = series.keyframe
keyframe.decode # cache decode function
shape = series.get_shape(squeeze)
dtype = series.dtype
if fillvalue is None:
self._fillvalue = fillvalue = keyframe.nodata
if self._chunkmode:
chunks = keyframe.shape
else:
chunks = keyframe.chunks
if '_ARRAY_DIMENSIONS' not in zattrs:
zattrs['_ARRAY_DIMENSIONS'] = list(series.get_axes(squeeze))
self._store['.zattrs'] = _json_dumps(zattrs)
self._store['.zarray'] = _json_dumps(
{
'zarr_format': 2,
'shape': shape,
'chunks': _chunks(chunks, shape, keyframe.shaped),
'dtype': _dtype_str(dtype),
'compressor': None,
'fill_value': _json_value(fillvalue, dtype),
'order': 'C',
'filters': None,
}
)
if not self._read_only:
self._read_only = not _is_writable(keyframe)
def close(self) -> None:
"""Close store."""
super().close()
self._filecache.clear()
def write_fsspec(
self,
jsonfile: str | os.PathLike[Any] | TextIO,
/,
url: str | None,
*,
groupname: str | None = None,
templatename: str | None = None,
compressors: dict[COMPRESSION | int, str | None] | None = None,
version: int | None = None,
_shape: Sequence[int] | None = None,
_axes: Sequence[str] | None = None,
_index: Sequence[int] | None = None,
_append: bool = False,
_close: bool = True,
) -> None:
"""Write fsspec ReferenceFileSystem as JSON to file.
Parameters:
jsonfile:
Name or open file handle of output JSON file.
url:
Remote location of TIFF file(s) without file name(s).
groupname:
Zarr group name.
templatename:
Version 1 URL template name. The default is 'u'.
compressors:
Mapping of :py:class:`COMPRESSION` codes to Numcodecs codec
names.
version:
Version of fsspec file to write. The default is 0.
_shape:
Shape of file sequence (experimental).
_axes:
Axes of file sequence (experimental).
_index
Index of file in sequence (experimental).
_append:
If *True*, only write index keys and values (experimental).
_close:
If *True*, no more appends (experimental).
Raises:
ValueError:
ZarrTiffStore cannot be represented as ReferenceFileSystem
due to features that are not supported by Zarr, Numcodecs,
or Imagecodecs:
- compressors, such as CCITT
- filters, such as bitorder reversal, packed integers
- dtypes, such as float24, complex integers
- JPEGTables in multi-page series
- incomplete chunks, such as `imagelength % rowsperstrip != 0`
Files containing incomplete tiles may fail at runtime.
Notes:
Parameters `_shape`, `_axes`, `_index`, `_append`, and `_close`
are an experimental API for joining the ReferenceFileSystems of
multiple files of a TiffSequence.
References:
- `fsspec ReferenceFileSystem format
<https://github.com/fsspec/kerchunk>`_
"""
compressors = {
1: None,
8: 'zlib',
32946: 'zlib',
34925: 'lzma',
50013: 'zlib', # pixtiff
5: 'imagecodecs_lzw',
7: 'imagecodecs_jpeg',
22610: 'imagecodecs_jpegxr',
32773: 'imagecodecs_packbits',
33003: 'imagecodecs_jpeg2k',
33004: 'imagecodecs_jpeg2k',
33005: 'imagecodecs_jpeg2k',
33007: 'imagecodecs_jpeg',
34712: 'imagecodecs_jpeg2k',
34887: 'imagecodecs_lerc',
34892: 'imagecodecs_jpeg',
34933: 'imagecodecs_png',
34934: 'imagecodecs_jpegxr',
48124: 'imagecodecs_jetraw',
50000: 'imagecodecs_zstd', # numcodecs.zstd fails w/ unknown sizes
50001: 'imagecodecs_webp',
50002: 'imagecodecs_jpegxl',
52546: 'imagecodecs_jpegxl',
**({} if compressors is None else compressors),
}
for series in self._data:
errormsg = ' not supported by the fsspec ReferenceFileSystem'
keyframe = series.keyframe
if (
keyframe.compression in {65000, 65001, 65002}
and keyframe.parent.is_eer
):
compressors[keyframe.compression] = 'imagecodecs_eer'
if keyframe.compression not in compressors:
raise ValueError(f'{keyframe.compression!r} is' + errormsg)
if keyframe.fillorder != 1:
raise ValueError(f'{keyframe.fillorder!r} is' + errormsg)
if keyframe.sampleformat not in {1, 2, 3, 6}:
# TODO: support float24 and cint via filters?
raise ValueError(f'{keyframe.sampleformat!r} is' + errormsg)
if (
keyframe.bitspersample
not in {
8,
16,
32,
64,
128,
}
and keyframe.compression
not in {
# JPEG
7,
33007,
34892,
}
and compressors[keyframe.compression] != 'imagecodecs_eer'
):
raise ValueError(
f'BitsPerSample {keyframe.bitspersample} is' + errormsg
)
if (
not self._chunkmode
and not keyframe.is_tiled
and keyframe.imagelength % keyframe.rowsperstrip
):
raise ValueError('incomplete chunks are' + errormsg)
if self._chunkmode and not keyframe.is_final:
raise ValueError(f'{self._chunkmode!r} is' + errormsg)
if keyframe.jpegtables is not None and len(series.pages) > 1:
raise ValueError(
'JPEGTables in multi-page files are' + errormsg
)
if url is None:
url = ''
elif url and url[-1] != '/':
url += '/'
url = url.replace('\\', '/')
if groupname is None:
groupname = ''
elif groupname and groupname[-1] != '/':
groupname += '/'
byteorder: ByteOrder | None = '<' if sys.byteorder == 'big' else '>'
if (
self._data[0].keyframe.parent.byteorder != byteorder
or self._data[0].keyframe.dtype is None
or self._data[0].keyframe.dtype.itemsize == 1
):
byteorder = None
index: str
_shape = [] if _shape is None else list(_shape)
_axes = [] if _axes is None else list(_axes)
if len(_shape) != len(_axes):
raise ValueError('len(_shape) != len(_axes)')
if _index is None:
index = ''
elif len(_shape) != len(_index):
raise ValueError('len(_shape) != len(_index)')
elif _index:
index = '.'.join(str(i) for i in _index)
index += '.'
refs: dict[str, Any] = {}
refzarr: dict[str, Any]
if version == 1:
if _append:
raise ValueError('cannot append to version 1')
if templatename is None:
templatename = 'u'
refs['version'] = 1
refs['templates'] = {}
refs['gen'] = []
templates = {}
if self._data[0].is_multifile:
i = 0
for page in self._data[0].pages:
if page is None or page.keyframe is None:
continue
fname = page.keyframe.parent.filehandle.name
if fname in templates:
continue
key = f'{templatename}{i}'
templates[fname] = f'{{{{{key}}}}}'
refs['templates'][key] = url + fname
i += 1
else:
fname = self._data[0].keyframe.parent.filehandle.name
key = f'{templatename}'
templates[fname] = f'{{{{{key}}}}}'
refs['templates'][key] = url + fname
refs['refs'] = refzarr = {}
else:
refzarr = refs
if not _append:
if groupname:
# TODO: support nested groups
refzarr['.zgroup'] = _json_dumps({'zarr_format': 2}).decode()
for key, value in self._store.items():
if '.zattrs' in key and _axes:
value = json.loads(value)
if '_ARRAY_DIMENSIONS' in value:
value['_ARRAY_DIMENSIONS'] = (
_axes + value['_ARRAY_DIMENSIONS']
)
value = _json_dumps(value)
elif '.zarray' in key:
level = int(key.split('/')[0]) if '/' in key else 0
keyframe = self._data[level].keyframe
value = json.loads(value)
if _shape:
value['shape'] = _shape + value['shape']
value['chunks'] = [1] * len(_shape) + value['chunks']
codec_id = compressors[keyframe.compression]
if codec_id == 'imagecodecs_jpeg':
# TODO: handle JPEG color spaces
jpegtables = keyframe.jpegtables
if jpegtables is None:
tables = None
else:
import base64
tables = base64.b64encode(jpegtables).decode()
jpegheader = keyframe.jpegheader
if jpegheader is None:
header = None
else:
import base64
header = base64.b64encode(jpegheader).decode()
(
colorspace_jpeg,
colorspace_data,
) = jpeg_decode_colorspace(
keyframe.photometric,
keyframe.planarconfig,
keyframe.extrasamples,
keyframe.is_jfif,
)
value['compressor'] = {
'id': codec_id,
'tables': tables,
'header': header,
'bitspersample': keyframe.bitspersample,
'colorspace_jpeg': colorspace_jpeg,
'colorspace_data': colorspace_data,
}
elif (
codec_id == 'imagecodecs_webp'
and keyframe.samplesperpixel == 4
):
value['compressor'] = {
'id': codec_id,
'hasalpha': True,
}
elif codec_id == 'imagecodecs_eer':
horzbits = vertbits = 2
if keyframe.compression == 65002:
skipbits = int(keyframe.tags.valueof(65007, 7))
horzbits = int(keyframe.tags.valueof(65008, 2))
vertbits = int(keyframe.tags.valueof(65009, 2))
elif keyframe.compression == 65001:
skipbits = 7
else:
skipbits = 8
value['compressor'] = {
'id': codec_id,
'shape': keyframe.chunks,
'skipbits': skipbits,
'horzbits': horzbits,
'vertbits': vertbits,
'superres': keyframe.parent._superres,
}
elif codec_id is not None:
value['compressor'] = {'id': codec_id}
if byteorder is not None:
value['dtype'] = byteorder + value['dtype'][1:]
if keyframe.predictor > 1:
# predictors need access to chunk shape and dtype
# requires imagecodecs > 2021.8.26 to read
if keyframe.predictor in {2, 34892, 34893}:
filter_id = 'imagecodecs_delta'
else:
filter_id = 'imagecodecs_floatpred'
if keyframe.predictor <= 3:
dist = 1
elif keyframe.predictor in {34892, 34894}:
dist = 2
else:
dist = 4
if (
keyframe.planarconfig == 1
and keyframe.samplesperpixel > 1
):
axis = -2
else:
axis = -1
value['filters'] = [
{
'id': filter_id,
'axis': axis,
'dist': dist,
'shape': value['chunks'],
'dtype': value['dtype'],
}
]
value = _json_dumps(value)
refzarr[groupname + key] = value.decode()
fh: TextIO
if hasattr(jsonfile, 'write'):
fh = jsonfile # type: ignore[assignment]
else:
fh = open(jsonfile, 'w', encoding='utf-8')
if version == 1:
fh.write(json.dumps(refs, indent=1).rsplit('}"', 1)[0] + '}"')
indent = ' '
elif _append:
indent = ' '
else:
fh.write(json.dumps(refs, indent=1)[:-2])
indent = ' '
offset: int | None
for key, value in self._store.items():
if '.zarray' in key:
value = json.loads(value)
shape = value['shape']
chunks = value['chunks']
levelstr = (key.split('/')[0] + '/') if '/' in key else ''
for chunkindex in _ndindex(shape, chunks):
key = levelstr + chunkindex
keyframe, page, _, offset, bytecount = self._parse_key(key)
if page and self._chunkmode and offset is None:
offset = page.dataoffsets[0]
bytecount = keyframe.nbytes
if offset and bytecount:
fname = keyframe.parent.filehandle.name
if version == 1:
fname = templates[fname]
else:
fname = f'{url}{fname}'
fh.write(
f',\n{indent}"{groupname}{key}": '
f'["{fname}", {offset}, {bytecount}]'
)
# TODO: support nested groups
if version == 1:
fh.write('\n }\n}')
elif _close:
fh.write('\n}')
if not hasattr(jsonfile, 'write'):
fh.close()
async def get(
self,
key: str,
prototype: BufferPrototype,
byte_range: ByteRequest | None = None,
) -> Buffer | None:
"""Return value associated with key."""
# print(f'get({key=}, {byte_range=})')
if byte_range is not None:
raise NotImplementedError(f'{byte_range=!r} not supported')
if key in self._store:
return prototype.buffer.from_bytes(self._store[key])
if (
key == 'zarr.json'
or key[-10:] == '.zmetadata'
or key[-7:] == '.zarray'
or key[-7:] == '.zgroup'
):
# catch '.zarray' and 'attribute/.zarray'
return None
keyframe, page, chunkindex, offset, bytecount = self._parse_key(key)
if page is None or offset == 0 or bytecount == 0:
return None
fh = page.parent.filehandle
if self._chunkmode:
if offset is not None:
# contiguous image data in page or series
# create virtual frame instead of loading page from file
assert bytecount is not None
page = TiffFrame(
page.parent,
index=0,
keyframe=keyframe,
dataoffsets=(offset,),
databytecounts=(bytecount,),
)
# TODO: use asyncio.to_thread ?
self._filecache.open(fh)
chunk = page.asarray(
lock=self._filecache.lock,
maxworkers=self._maxworkers,
buffersize=self._buffersize,
)
self._filecache.close(fh)
if self._transform is not None:
chunk = self._transform(chunk)
return prototype.buffer(chunk.reshape(-1).view('B'))
assert offset is not None and bytecount is not None
chunk_bytes = self._filecache.read(fh, offset, bytecount)
decodeargs: dict[str, Any] = {'_fullsize': True}
if page.jpegtables is not None:
decodeargs['jpegtables'] = page.jpegtables
if keyframe.jpegheader is not None:
decodeargs['jpegheader'] = keyframe.jpegheader
assert chunkindex is not None
keyframe.decode # cache decode function
if self._maxworkers > 1:
decoded = await asyncio.to_thread(
keyframe.decode, chunk_bytes, chunkindex, **decodeargs
)
else:
decoded = keyframe.decode(chunk_bytes, chunkindex, **decodeargs)
chunk = decoded[0] # type: ignore[assignment]
del decoded
assert chunk is not None
if self._transform is not None:
chunk = self._transform(chunk)
if self._chunkmode:
chunks = keyframe.shape # type: ignore[unreachable]
else:
chunks = keyframe.chunks
if chunk.size != product(chunks):
raise RuntimeError(f'{chunk.size} != {product(chunks)}')
return prototype.buffer(chunk.reshape(-1).view('B'))
async def exists(self, key: str) -> bool:
"""Return whether key exists in store."""
# print(f'exists({key=})')
if key in self._store:
return True
assert isinstance(key, str)
try:
_, page, _, offset, bytecount = self._parse_key(key)
except (KeyError, IndexError):
return False
if self._chunkmode and offset is None:
return True
return (
page is not None
and offset is not None
and bytecount is not None
and offset > 0
and bytecount > 0
)
async def set(self, key: str, value: Buffer) -> None:
"""Store (key, value) pair."""
if self._read_only:
raise PermissionError('ZarrTiffStore is read-only')
if (
key in self._store
or key == 'zarr.json'
or key[-10:] == '.zmetadata'
or key[-7:] == '.zarray'
or key[-7:] == '.zgroup'
):
# catch '.zarray' and 'attribute/.zarray'
return None
keyframe, page, chunkindex, offset, bytecount = self._parse_key(key)
if (
page is None
or offset is None
or offset == 0
or bytecount is None
or bytecount == 0
):
return
data = value.to_bytes()
if bytecount < len(data):
data = data[:bytecount]
self._filecache.write(page.parent.filehandle, offset, data)
def _parse_key(self, key: str, /) -> tuple[
TiffPage,
TiffPage | TiffFrame | None,
int | None,
int | None,
int | None,
]:
"""Return keyframe, page, index, offset, and bytecount from key.
Raise KeyError if key is not valid.
"""
if self._multiscales:
try:
level, key = key.split('/')
series = self._data[int(level)]
except (ValueError, IndexError) as exc:
raise KeyError(key) from exc
else:
series = self._data[0]
keyframe = series.keyframe
pageindex, chunkindex = self._indices(key, series)
if series.dataoffset is not None:
# contiguous or truncated
page = series[0]
if page is None or page.dtype is None or page.keyframe is None:
return keyframe, None, chunkindex, 0, 0
offset = pageindex * page.size * page.dtype.itemsize
try:
offset += page.dataoffsets[chunkindex]
except IndexError as exc:
raise KeyError(key) from exc
if self._chunkmode:
bytecount = page.size * page.dtype.itemsize
return page.keyframe, page, chunkindex, offset, bytecount
elif self._chunkmode:
with self._filecache.lock:
page = series[pageindex]
if page is None or page.keyframe is None:
return keyframe, None, None, 0, 0
return page.keyframe, page, None, None, None
else:
with self._filecache.lock:
page = series[pageindex]
if page is None or page.keyframe is None:
return keyframe, None, chunkindex, 0, 0
try:
offset = page.dataoffsets[chunkindex]
except IndexError:
# raise KeyError(key) from exc
# issue #249: Philips may be missing last row of tiles
return page.keyframe, page, chunkindex, 0, 0
try:
bytecount = page.databytecounts[chunkindex]
except IndexError as exc:
raise KeyError(key) from exc
return page.keyframe, page, chunkindex, offset, bytecount
def _indices(self, key: str, series: TiffPageSeries, /) -> tuple[int, int]:
"""Return page and strile indices from Zarr chunk index."""
keyframe = series.keyframe
shape = series.get_shape(self._squeeze)
try:
indices = [int(i) for i in key.split('.')]
except ValueError as exc:
raise KeyError(key) from exc
assert len(indices) == len(shape)
if self._chunkmode:
chunked = (1,) * len(keyframe.shape)
else:
chunked = keyframe.chunked
p = 1
for i, s in enumerate(shape[::-1]):
p *= s
if p == keyframe.size:
i = len(indices) - i - 1
frames_indices = indices[:i]
strile_indices = indices[i:]
frames_chunked = shape[:i]
strile_chunked = list(shape[i:]) # updated later
break
else:
raise RuntimeError
if len(strile_chunked) == len(keyframe.shape):
strile_chunked = list(chunked)
else:
# get strile_chunked including singleton dimensions
i = len(strile_indices) - 1
j = len(keyframe.shape) - 1
while True:
if strile_chunked[i] == keyframe.shape[j]:
strile_chunked[i] = chunked[j]
i -= 1
j -= 1
elif strile_chunked[i] == 1:
i -= 1
else:
raise RuntimeError('shape does not match page shape')
if i < 0 or j < 0:
break
assert product(strile_chunked) == product(chunked)
if len(frames_indices) > 0:
frameindex = int(
numpy.ravel_multi_index(frames_indices, frames_chunked)
)
else:
frameindex = 0
if len(strile_indices) > 0:
strileindex = int(
numpy.ravel_multi_index(strile_indices, strile_chunked)
)
else:
strileindex = 0
return frameindex, strileindex
class ZarrFileSequenceStore(ZarrStore):
"""Zarr 3 store interface to image array in FileSequence.
Parameters:
filesequence:
FileSequence instance to wrap as Zarr store.
Files in containers are not supported.
fillvalue:
Value to use for missing chunks. The default is 0.
chunkmode:
Currently only one chunk per file is supported.
chunkshape:
Shape of chunk in each file.
Must match ``FileSequence.imread(file, **imreadargs).shape``.
chunkdtype:
Data type of chunk in each file.
Must match ``FileSequence.imread(file, **imreadargs).dtype``.
axestiled:
Axes to be tiled. Map stacked sequence axis to chunk axis.
zattrs:
Additional attributes to store in `.zattrs`.
ioworkers:
If not 1, asynchronously run `imread` function in separate thread.
If enabled, internal threading for the `imread` function
should be disabled.
read_only:
Passed to :py:class:`zarr.abc.store.Store`.
imreadargs:
Arguments passed to :py:attr:`FileSequence.imread`.
**kwargs:
Arguments passed to :py:attr:`FileSequence.imread`in addition
to `imreadargs`.
Notes:
If `chunkshape` or `chunkdtype` are *None* (default), their values
are determined by reading the first file with
``FileSequence.imread(arg.files[0], **imreadargs)``.
"""
imread: Callable[..., NDArray[Any]]
"""Function to read image array from single file."""
_lookup: dict[tuple[int, ...], str]
_chunks: tuple[int, ...]
_dtype: numpy.dtype[Any]
_tiled: TiledSequence
_commonpath: str
_ioworkers: int
_kwargs: dict[str, Any]
def __init__(
self,
filesequence: FileSequence,
/,
*,
fillvalue: int | float | None = None,
chunkmode: CHUNKMODE | int | str | None = None,
chunkshape: Sequence[int] | None = None,
chunkdtype: DTypeLike | None = None,
axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None,
zattrs: dict[str, Any] | None = None,
ioworkers: int | None = 1,
imreadargs: dict[str, Any] | None = None,
read_only: bool = True,
**kwargs: Any,
) -> None:
super().__init__(
fillvalue=fillvalue, chunkmode=chunkmode, read_only=read_only
)
if self._chunkmode not in {0, 3}:
raise ValueError(f'invalid chunkmode {self._chunkmode!r}')
if not isinstance(filesequence, FileSequence):
raise TypeError('not a FileSequence')
if filesequence._container:
raise NotImplementedError('cannot open container as Zarr store')
# TODO: deprecate kwargs?
if imreadargs is not None:
kwargs |= imreadargs
self._ioworkers = 1 if ioworkers is None else ioworkers
self._kwargs = kwargs
self._imread = filesequence.imread
self._commonpath = filesequence.commonpath()
if chunkshape is None or chunkdtype is None:
chunk = filesequence.imread(filesequence[0], **kwargs)
self._chunks = chunk.shape
self._dtype = chunk.dtype
else:
self._chunks = tuple(chunkshape)
self._dtype = numpy.dtype(chunkdtype)
chunk = None
self._tiled = TiledSequence(
filesequence.shape, self._chunks, axestiled=axestiled
)
self._lookup = dict(
zip(self._tiled.indices(filesequence.indices), filesequence)
)
zattrs = {} if zattrs is None else dict(zattrs)
# TODO: add _ARRAY_DIMENSIONS to ZarrFileSequenceStore
# if '_ARRAY_DIMENSIONS' not in zattrs:
# zattrs['_ARRAY_DIMENSIONS'] = list(...)
self._store['.zattrs'] = _json_dumps(zattrs)
self._store['.zarray'] = _json_dumps(
{
'zarr_format': 2,
'shape': self._tiled.shape,
'chunks': self._tiled.chunks,
'dtype': _dtype_str(self._dtype),
'compressor': None,
'fill_value': _json_value(fillvalue, self._dtype),
'order': 'C',
'filters': None,
}
)
async def exists(self, key: str) -> bool:
"""Return whether key exists in store."""
# print(f'exists({key=})')
if key in self._store:
return True
assert isinstance(key, str)
try:
indices = tuple(int(i) for i in key.split('.'))
except Exception:
return False
return indices in self._lookup
async def get(
self,
key: str,
prototype: BufferPrototype,
byte_range: ByteRequest | None = None,
) -> Buffer | None:
"""Return value associated with key."""
if byte_range is not None:
raise NotImplementedError(f'{byte_range=!r} not supported')
if key in self._store:
return prototype.buffer.from_bytes(self._store[key])
if (
key == 'zarr.json'
or key[-10:] == '.zmetadata'
or key[-7:] == '.zarray'
or key[-7:] == '.zgroup'
):
# catch '.zarray' and 'attribute/.zarray'
return None
indices = tuple(int(i) for i in key.split('.'))
filename = self._lookup.get(indices, None)
if filename is None:
return None
if self._ioworkers != 1:
chunk = await asyncio.to_thread(
self._imread, filename, **self._kwargs
)
else:
chunk = self._imread(filename, **self._kwargs)
return prototype.buffer(chunk.reshape(-1).view('B'))
def write_fsspec(
self,
jsonfile: str | os.PathLike[Any] | TextIO,
/,
url: str | None,
*,
quote: bool | None = None,
groupname: str | None = None,
templatename: str | None = None,
codec_id: str | None = None,
version: int | None = None,
_append: bool = False,
_close: bool = True,
) -> None:
"""Write fsspec ReferenceFileSystem as JSON to file.
Parameters:
jsonfile:
Name or open file handle of output JSON file.
url:
Remote location of TIFF file(s) without file name(s).
quote:
Quote file names, that is, replace ' ' with '%20'.
The default is True.
groupname:
Zarr group name.
templatename:
Version 1 URL template name. The default is 'u'.
codec_id:
Name of Numcodecs codec to decode files or chunks.
version:
Version of fsspec file to write. The default is 0.
_append, _close:
Experimental API.
References:
- `fsspec ReferenceFileSystem format
<https://github.com/fsspec/kerchunk>`_
"""
from urllib.parse import quote as quote_
kwargs = self._kwargs.copy()
if codec_id is not None:
pass
elif self._imread is imread:
codec_id = 'tifffile'
elif 'imagecodecs' in self._imread.__module__:
if (
self._imread.__name__ != 'imread'
or 'codec' not in self._kwargs
):
raise ValueError('cannot determine codec_id')
codec = kwargs.pop('codec')
if isinstance(codec, (list, tuple)):
codec = codec[0]
if callable(codec):
codec = codec.__name__.split('_')[0]
codec_id = {
'apng': 'imagecodecs_apng',
'avif': 'imagecodecs_avif',
'gif': 'imagecodecs_gif',
'heif': 'imagecodecs_heif',
'jpeg': 'imagecodecs_jpeg',
'jpeg8': 'imagecodecs_jpeg',
'jpeg12': 'imagecodecs_jpeg',
'jpeg2k': 'imagecodecs_jpeg2k',
'jpegls': 'imagecodecs_jpegls',
'jpegxl': 'imagecodecs_jpegxl',
'jpegxr': 'imagecodecs_jpegxr',
'ljpeg': 'imagecodecs_ljpeg',
'lerc': 'imagecodecs_lerc',
# 'npy': 'imagecodecs_npy',
'png': 'imagecodecs_png',
'qoi': 'imagecodecs_qoi',
'tiff': 'imagecodecs_tiff',
'webp': 'imagecodecs_webp',
'zfp': 'imagecodecs_zfp',
}[codec]
else:
# TODO: choose codec from filename
raise ValueError('cannot determine codec_id')
if url is None:
url = ''
elif url and url[-1] != '/':
url += '/'
if groupname is None:
groupname = ''
elif groupname and groupname[-1] != '/':
groupname += '/'
refs: dict[str, Any] = {}
if version == 1:
if _append:
raise ValueError('cannot append to version 1 files')
if templatename is None:
templatename = 'u'
refs['version'] = 1
refs['templates'] = {templatename: url}
refs['gen'] = []
refs['refs'] = refzarr = {}
url = f'{{{{{templatename}}}}}'
else:
refzarr = refs
if groupname and not _append:
refzarr['.zgroup'] = _json_dumps({'zarr_format': 2}).decode()
for key, value in self._store.items():
if '.zarray' in key:
value = json.loads(value)
# TODO: make kwargs serializable
value['compressor'] = {'id': codec_id, **kwargs}
value = _json_dumps(value)
refzarr[groupname + key] = value.decode()
fh: TextIO
if hasattr(jsonfile, 'write'):
fh = jsonfile # type: ignore[assignment]
else:
fh = open(jsonfile, 'w', encoding='utf-8')
if version == 1:
fh.write(json.dumps(refs, indent=1).rsplit('}"', 1)[0] + '}"')
indent = ' '
elif _append:
fh.write(',\n')
fh.write(json.dumps(refs, indent=1)[2:-2])
indent = ' '
else:
fh.write(json.dumps(refs, indent=1)[:-2])
indent = ' '
prefix = len(self._commonpath)
for key, value in self._store.items():
if '.zarray' in key:
value = json.loads(value)
for index, filename in sorted(
self._lookup.items(), key=lambda x: x[0]
):
filename = filename[prefix:].replace('\\', '/')
if quote is None or quote:
filename = quote_(filename)
if filename[0] == '/':
filename = filename[1:]
indexstr = '.'.join(str(i) for i in index)
fh.write(
f',\n{indent}"{groupname}{indexstr}": '
f'["{url}{filename}"]'
)
if version == 1:
fh.write('\n }\n}')
elif _close:
fh.write('\n}')
if not hasattr(jsonfile, 'write'):
fh.close()
def zarr_selection(
store: ZarrStore,
selection: BasicSelection,
/,
*,
groupindex: str | None = None,
close: bool = True,
out: OutputType = None,
) -> NDArray[Any]:
"""Return selection from Zarr store.
Parameters:
store:
ZarrStore instance to read selection from.
selection:
Subset of image to be extracted and returned.
Refer to the Zarr documentation for valid selections.
groupindex:
Index of array if store is Zarr group.
close:
Close store before returning.
out:
Specifies how image array is returned.
By default, create a new array.
If a *numpy.ndarray*, a writable array to which the images
are copied.
If *'memmap'*, create a memory-mapped array in a temporary
file.
If a *string* or *open file*, the file used to create a
memory-mapped array.
"""
import zarr
from zarr.core.indexing import BasicIndexer
zarray: zarr.Array
z = zarr.open(store, mode='r', zarr_format=2)
try:
if isinstance(z, zarr.Group):
if groupindex is None:
groupindex = '0'
zarray = z[groupindex] # type: ignore[assignment]
else:
zarray = z
if out is not None:
shape = BasicIndexer(
selection,
shape=zarray.shape,
chunk_grid=RegularChunkGrid(chunk_shape=zarray.chunks),
).shape
ndbuffer = NDBuffer.from_numpy_array(
create_output(out, shape, zarray.dtype)
)
else:
ndbuffer = None
result = zarray.get_basic_selection(selection, out=ndbuffer)
del zarray
finally:
if close:
store.close()
return result # type: ignore[return-value]
def _empty_chunk(
shape: tuple[int, ...],
dtype: DTypeLike,
fillvalue: int | float | None,
/,
) -> NDArray[Any]:
"""Return empty chunk."""
if fillvalue is None or fillvalue == 0:
# return bytes(product(shape) * dtype.itemsize)
return numpy.zeros(shape, dtype)
chunk = numpy.empty(shape, dtype)
chunk[:] = fillvalue
return chunk # .tobytes()
def _dtype_str(dtype: numpy.dtype[Any], /) -> str:
"""Return dtype as string with native byte order."""
if dtype.itemsize == 1:
byteorder = '|'
else:
byteorder = {'big': '>', 'little': '<'}[sys.byteorder]
return byteorder + dtype.str[1:]
def _json_dumps(obj: Any, /) -> bytes:
"""Serialize object to JSON formatted string."""
return json.dumps(
obj,
indent=1,
sort_keys=True,
ensure_ascii=True,
separators=(',', ': '),
).encode('ascii')
def _json_value(value: Any, dtype: numpy.dtype[Any], /) -> Any:
"""Return value which is serializable to JSON."""
if value is None:
return value
if dtype.kind == 'b':
return bool(value)
if dtype.kind in 'ui':
return int(value)
if dtype.kind == 'f':
if numpy.isnan(value):
return 'NaN'
if numpy.isposinf(value):
return 'Infinity'
if numpy.isneginf(value):
return '-Infinity'
return float(value)
if dtype.kind == 'c':
value = numpy.array(value, dtype)
return (
_json_value(value.real, dtype.type().real.dtype),
_json_value(value.imag, dtype.type().imag.dtype),
)
return value
def _ndindex(
shape: tuple[int, ...], chunks: tuple[int, ...], /
) -> Iterator[str]:
"""Return iterator over all chunk index strings."""
assert len(shape) == len(chunks)
chunked = tuple(
i // j + (1 if i % j else 0) for i, j in zip(shape, chunks)
)
for indices in numpy.ndindex(chunked):
yield '.'.join(str(index) for index in indices)
def _is_writable(keyframe: TiffPage) -> bool:
"""Return True if chunks are writable."""
return (
keyframe.compression == 1
and keyframe.fillorder == 1
and keyframe.sampleformat in {1, 2, 3, 6}
and keyframe.bitspersample in {8, 16, 32, 64, 128}
# and (
# keyframe.rowsperstrip == 0
# or keyframe.imagelength % keyframe.rowsperstrip == 0
# )
)
def _chunks(
chunks: tuple[int, ...],
shape: tuple[int, ...],
shaped: tuple[int, int, int, int, int],
/,
) -> tuple[int, ...]:
"""Return chunks with same length as shape."""
ndim = len(shape)
if ndim == 0:
return () # empty array
if 0 in shape:
return (1,) * ndim
d = 0 if shaped[1] == 1 else 1
i = min(ndim, 3 + d)
n = len(chunks)
if (
n == 2 + d
and i != 2 + d
and shape[-1] == 1
and shape[-i:] == shaped[-i:]
):
# planarconfig=contig with one sample
chunks = chunks + (1,)
if ndim < len(chunks):
# remove leading dimensions of size 1 from chunks
for i, size in enumerate(chunks):
if size > 1:
break
chunks = chunks[i:]
if ndim < len(chunks):
raise ValueError(f'{shape=!r} is shorter than {chunks=!r}')
# prepend size 1 dimensions to chunks to match length of shape
return tuple([1] * (ndim - len(chunks)) + list(chunks))