# tifffile/zarr.py # Copyright (c) 2008-2025, Christoph Gohlke # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. """Zarr 3 TIFF and file sequence stores.""" from __future__ import annotations __all__ = ['ZarrStore', 'ZarrTiffStore', 'ZarrFileSequenceStore'] import asyncio import json import os import sys import threading from typing import TYPE_CHECKING import numpy import zarr try: from zarr.abc.store import ByteRequest, Store from zarr.core.buffer.cpu import NDBuffer from zarr.core.chunk_grids import RegularChunkGrid except ImportError as exc: raise ValueError(f'zarr {zarr.__version__} < 3 is not supported') from exc from .tifffile import ( CHUNKMODE, COMPRESSION, FileCache, FileSequence, NullContext, TiffFrame, TiffPage, TiffPageSeries, TiledSequence, create_output, enumarg, imread, jpeg_decode_colorspace, product, ) if TYPE_CHECKING: from collections.abc import ( AsyncIterator, Callable, Iterable, Iterator, Sequence, ) from typing import Any, TextIO from numpy.typing import DTypeLike, NDArray from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.indexing import BasicSelection from .tifffile import ByteOrder, OutputType class ZarrStore(Store): """Zarr 3 store base class. Parameters: fillvalue: Value to use for missing chunks of Zarr store. The default is 0. chunkmode: Specifies how to chunk data. read_only: Passed to :py:class:`zarr.abc.store.Store`. References: 1. https://zarr.readthedocs.io/en/stable/api/zarr/abc/store/ 2. https://zarr.readthedocs.io/en/stable/spec/v2.html 3. https://forum.image.sc/t/multiscale-arrays-v0-1/37930 """ _read_only: bool _store: dict[str, Any] _fillvalue: int | float _chunkmode: int def __init__( self, /, *, fillvalue: int | float | None = None, chunkmode: CHUNKMODE | int | str | None = None, read_only: bool = True, ) -> None: super().__init__(read_only=read_only) self._store = {} self._fillvalue = 0 if fillvalue is None else fillvalue if chunkmode is None: self._chunkmode = CHUNKMODE(0) else: self._chunkmode = enumarg(CHUNKMODE, chunkmode) def __eq__(self, other: object) -> bool: """Return whether objects are equal.""" return ( isinstance(other, type(self)) and self._store == other._store and self._fillvalue == other._fillvalue and self._chunkmode == other._chunkmode ) async def get_partial_values( self, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: """Return possibly partial values from given key_ranges.""" # print(f'get_partial_values({key_ranges=})') return [ await self.get(key, prototype, byte_range) for key, byte_range in key_ranges ] @property def supports_writes(self) -> bool: """Store supports writes.""" return not self._read_only def _set(self, key: str, value: Buffer, /) -> None: """Store (key, value) pair.""" raise NotImplementedError async def set(self, key: str, value: Buffer) -> None: """Store (key, value) pair.""" self._set(key, value) @property def supports_deletes(self) -> bool: """Store supports deletes.""" return False async def delete(self, key: str) -> None: """Remove key from store.""" raise PermissionError('ZarrStore does not support deletes') @property def supports_listing(self) -> bool: """Store supports listing.""" return True async def list(self) -> AsyncIterator[str]: """Return all keys in store.""" for key in self._store: yield key async def list_prefix(self, prefix: str) -> AsyncIterator[str]: """Return all keys in store that begin with prefix. Keys are returned relative to the root of the store. """ for key in list(self._store): if key.startswith(prefix): yield key async def list_dir(self, prefix: str) -> AsyncIterator[str]: """Return all keys and prefixes with prefix. Keys and prefixes do not contain the character "/" after the given prefix. """ prefix = prefix.rstrip('/') if prefix == '': keys_unique = {k.split('/')[0] for k in self._store} else: keys_unique = { key.removeprefix(prefix + '/').split('/')[0] for key in self._store if key.startswith(prefix + '/') and key != prefix } for key in keys_unique: yield key @property def is_multiscales(self) -> bool: """Return whether ZarrStore contains multiscales.""" return b'multiscales' in self._store['.zattrs'] def __repr__(self) -> str: return f'{self.__class__.__name__}' # async def _get_many( # self, # requests: Iterable[tuple[str, BufferPrototype, ByteRequest | None]] # ) -> AsyncGenerator[tuple[str, Buffer | None], None]: # print(f'_get_many({requests=})') # return super()._get_many(requests) # async def getsize_prefix(self, prefix: str) -> int: # print(f'getsize_prefix({prefix=})') # return super().getsize_prefix(prefix) class ZarrTiffStore(ZarrStore): """Zarr 3 store interface to image array in TiffPage or TiffPageSeries. ZarrTiffStore is using a TiffFile instance for reading and decoding chunks. Therefore, ZarrTiffStore instances cannot be pickled. For writing, image data must be stored in uncompressed, unpredicted, and unpacked form. Sparse strips and tiles are not written. Parameters: arg: TIFF page or series to wrap as Zarr store. level: Pyramidal level to wrap. The default is 0. chunkmode: Use strips or tiles (0) or whole page data (2) as chunks. The default is 0. fillvalue: Value to use for missing chunks. The default is 0. zattrs: Additional attributes to store in `.zattrs`. multiscales: Create a multiscales-compatible Zarr group store. By default, create a Zarr array store for pages and non-pyramidal series. lock: Reentrant lock to synchronize seeks and reads from file. By default, the lock of the parent's file handle is used. squeeze: Remove length-1 dimensions from shape of TiffPageSeries. maxworkers: If `chunkmode=0`, asynchronously run chunk decode function in separate thread if greater than 1. If `chunkmode=2`, maximum number of threads to concurrently decode strips or tiles. If *None* or *0*, use up to :py:attr:`_TIFF.MAXWORKERS` or asyncio assigned threads. buffersize: Approximate number of bytes to read from file in one pass if `chunkmode=2`. The default is :py:attr:`_TIFF.BUFFERSIZE`. read_only: Passed to :py:class:`zarr.abc.store.Store`. _openfiles: Internal API. """ _data: list[TiffPageSeries] _filecache: FileCache _transform: Callable[[NDArray[Any]], NDArray[Any]] | None _maxworkers: int _buffersize: int | None _squeeze: bool | None _multiscales: bool def __init__( self, arg: TiffPage | TiffFrame | TiffPageSeries, /, *, level: int | None = None, chunkmode: CHUNKMODE | int | str | None = None, fillvalue: int | float | None = None, zattrs: dict[str, Any] | None = None, multiscales: bool | None = None, lock: threading.RLock | NullContext | None = None, squeeze: bool | None = None, maxworkers: int | None = None, buffersize: int | None = None, read_only: bool | None = None, _openfiles: int | None = None, **kwargs: Any, ) -> None: if chunkmode is None: self._chunkmode = CHUNKMODE(0) else: self._chunkmode = enumarg(CHUNKMODE, chunkmode) if self._chunkmode not in {0, 2}: raise NotImplementedError(f'{self._chunkmode!r} not implemented') self._squeeze = None if squeeze is None else bool(squeeze) self._buffersize = buffersize if isinstance(arg, TiffPageSeries): self._data = arg.levels self._transform = arg.transform if multiscales is not None and not multiscales: level = 0 if level is not None: self._data = [self._data[level]] name = arg.name else: self._data = [TiffPageSeries([arg])] self._transform = None name = 'Unnamed' if not maxworkers: maxworkers = self._data[0].keyframe.maxworkers if maxworkers < 3 and self._chunkmode == 0: maxworkers = 1 self._maxworkers = maxworkers fh = self._data[0].keyframe.parent._parent.filehandle if read_only is None: read_only = not fh.writable() or self._chunkmode != 0 super().__init__( fillvalue=fillvalue, chunkmode=self._chunkmode, read_only=read_only ) if lock is None: fh.set_lock(True) lock = fh.lock self._filecache = FileCache(size=_openfiles, lock=lock) zattrs = {} if zattrs is None else dict(zattrs) # TODO: Zarr Encoding Specification # https://xarray.pydata.org/en/stable/internals/zarr-encoding-spec.html if multiscales or len(self._data) > 1: # multiscales self._multiscales = True if '_ARRAY_DIMENSIONS' in zattrs: array_dimensions = zattrs.pop('_ARRAY_DIMENSIONS') else: array_dimensions = list(self._data[0].get_axes(squeeze)) self._store['.zgroup'] = _json_dumps({'zarr_format': 2}) self._store['.zattrs'] = _json_dumps( { # TODO: use https://ngff.openmicroscopy.org/latest/ 'multiscales': [ { 'version': '0.1', 'name': name, 'datasets': [ {'path': str(i)} for i in range(len(self._data)) ], # 'axes': [...] # 'type': 'unknown', 'metadata': {}, } ], **zattrs, } ) shape0 = self._data[0].get_shape(squeeze) for level, series in enumerate(self._data): keyframe = series.keyframe keyframe.decode # cache decode function shape = series.get_shape(squeeze) dtype = series.dtype if fillvalue is None: self._fillvalue = fillvalue = keyframe.nodata if self._chunkmode: chunks = keyframe.shape else: chunks = keyframe.chunks self._store[f'{level}/.zattrs'] = _json_dumps( { '_ARRAY_DIMENSIONS': [ (f'{ax}{level}' if i != j else ax) for ax, i, j in zip( array_dimensions, shape, shape0 ) ] } ) self._store[f'{level}/.zarray'] = _json_dumps( { 'zarr_format': 2, 'shape': shape, 'chunks': _chunks(chunks, shape, keyframe.shaped), 'dtype': _dtype_str(dtype), 'compressor': None, 'fill_value': _json_value(fillvalue, dtype), 'order': 'C', 'filters': None, } ) if not self._read_only: self._read_only = not _is_writable(keyframe) else: self._multiscales = False series = self._data[0] keyframe = series.keyframe keyframe.decode # cache decode function shape = series.get_shape(squeeze) dtype = series.dtype if fillvalue is None: self._fillvalue = fillvalue = keyframe.nodata if self._chunkmode: chunks = keyframe.shape else: chunks = keyframe.chunks if '_ARRAY_DIMENSIONS' not in zattrs: zattrs['_ARRAY_DIMENSIONS'] = list(series.get_axes(squeeze)) self._store['.zattrs'] = _json_dumps(zattrs) self._store['.zarray'] = _json_dumps( { 'zarr_format': 2, 'shape': shape, 'chunks': _chunks(chunks, shape, keyframe.shaped), 'dtype': _dtype_str(dtype), 'compressor': None, 'fill_value': _json_value(fillvalue, dtype), 'order': 'C', 'filters': None, } ) if not self._read_only: self._read_only = not _is_writable(keyframe) def close(self) -> None: """Close store.""" super().close() self._filecache.clear() def write_fsspec( self, jsonfile: str | os.PathLike[Any] | TextIO, /, url: str | None, *, groupname: str | None = None, templatename: str | None = None, compressors: dict[COMPRESSION | int, str | None] | None = None, version: int | None = None, _shape: Sequence[int] | None = None, _axes: Sequence[str] | None = None, _index: Sequence[int] | None = None, _append: bool = False, _close: bool = True, ) -> None: """Write fsspec ReferenceFileSystem as JSON to file. Parameters: jsonfile: Name or open file handle of output JSON file. url: Remote location of TIFF file(s) without file name(s). groupname: Zarr group name. templatename: Version 1 URL template name. The default is 'u'. compressors: Mapping of :py:class:`COMPRESSION` codes to Numcodecs codec names. version: Version of fsspec file to write. The default is 0. _shape: Shape of file sequence (experimental). _axes: Axes of file sequence (experimental). _index Index of file in sequence (experimental). _append: If *True*, only write index keys and values (experimental). _close: If *True*, no more appends (experimental). Raises: ValueError: ZarrTiffStore cannot be represented as ReferenceFileSystem due to features that are not supported by Zarr, Numcodecs, or Imagecodecs: - compressors, such as CCITT - filters, such as bitorder reversal, packed integers - dtypes, such as float24, complex integers - JPEGTables in multi-page series - incomplete chunks, such as `imagelength % rowsperstrip != 0` Files containing incomplete tiles may fail at runtime. Notes: Parameters `_shape`, `_axes`, `_index`, `_append`, and `_close` are an experimental API for joining the ReferenceFileSystems of multiple files of a TiffSequence. References: - `fsspec ReferenceFileSystem format `_ """ compressors = { 1: None, 8: 'zlib', 32946: 'zlib', 34925: 'lzma', 50013: 'zlib', # pixtiff 5: 'imagecodecs_lzw', 7: 'imagecodecs_jpeg', 22610: 'imagecodecs_jpegxr', 32773: 'imagecodecs_packbits', 33003: 'imagecodecs_jpeg2k', 33004: 'imagecodecs_jpeg2k', 33005: 'imagecodecs_jpeg2k', 33007: 'imagecodecs_jpeg', 34712: 'imagecodecs_jpeg2k', 34887: 'imagecodecs_lerc', 34892: 'imagecodecs_jpeg', 34933: 'imagecodecs_png', 34934: 'imagecodecs_jpegxr', 48124: 'imagecodecs_jetraw', 50000: 'imagecodecs_zstd', # numcodecs.zstd fails w/ unknown sizes 50001: 'imagecodecs_webp', 50002: 'imagecodecs_jpegxl', 52546: 'imagecodecs_jpegxl', **({} if compressors is None else compressors), } for series in self._data: errormsg = ' not supported by the fsspec ReferenceFileSystem' keyframe = series.keyframe if ( keyframe.compression in {65000, 65001, 65002} and keyframe.parent.is_eer ): compressors[keyframe.compression] = 'imagecodecs_eer' if keyframe.compression not in compressors: raise ValueError(f'{keyframe.compression!r} is' + errormsg) if keyframe.fillorder != 1: raise ValueError(f'{keyframe.fillorder!r} is' + errormsg) if keyframe.sampleformat not in {1, 2, 3, 6}: # TODO: support float24 and cint via filters? raise ValueError(f'{keyframe.sampleformat!r} is' + errormsg) if ( keyframe.bitspersample not in { 8, 16, 32, 64, 128, } and keyframe.compression not in { # JPEG 7, 33007, 34892, } and compressors[keyframe.compression] != 'imagecodecs_eer' ): raise ValueError( f'BitsPerSample {keyframe.bitspersample} is' + errormsg ) if ( not self._chunkmode and not keyframe.is_tiled and keyframe.imagelength % keyframe.rowsperstrip ): raise ValueError('incomplete chunks are' + errormsg) if self._chunkmode and not keyframe.is_final: raise ValueError(f'{self._chunkmode!r} is' + errormsg) if keyframe.jpegtables is not None and len(series.pages) > 1: raise ValueError( 'JPEGTables in multi-page files are' + errormsg ) if url is None: url = '' elif url and url[-1] != '/': url += '/' url = url.replace('\\', '/') if groupname is None: groupname = '' elif groupname and groupname[-1] != '/': groupname += '/' byteorder: ByteOrder | None = '<' if sys.byteorder == 'big' else '>' if ( self._data[0].keyframe.parent.byteorder != byteorder or self._data[0].keyframe.dtype is None or self._data[0].keyframe.dtype.itemsize == 1 ): byteorder = None index: str _shape = [] if _shape is None else list(_shape) _axes = [] if _axes is None else list(_axes) if len(_shape) != len(_axes): raise ValueError('len(_shape) != len(_axes)') if _index is None: index = '' elif len(_shape) != len(_index): raise ValueError('len(_shape) != len(_index)') elif _index: index = '.'.join(str(i) for i in _index) index += '.' refs: dict[str, Any] = {} refzarr: dict[str, Any] if version == 1: if _append: raise ValueError('cannot append to version 1') if templatename is None: templatename = 'u' refs['version'] = 1 refs['templates'] = {} refs['gen'] = [] templates = {} if self._data[0].is_multifile: i = 0 for page in self._data[0].pages: if page is None or page.keyframe is None: continue fname = page.keyframe.parent.filehandle.name if fname in templates: continue key = f'{templatename}{i}' templates[fname] = f'{{{{{key}}}}}' refs['templates'][key] = url + fname i += 1 else: fname = self._data[0].keyframe.parent.filehandle.name key = f'{templatename}' templates[fname] = f'{{{{{key}}}}}' refs['templates'][key] = url + fname refs['refs'] = refzarr = {} else: refzarr = refs if not _append: if groupname: # TODO: support nested groups refzarr['.zgroup'] = _json_dumps({'zarr_format': 2}).decode() for key, value in self._store.items(): if '.zattrs' in key and _axes: value = json.loads(value) if '_ARRAY_DIMENSIONS' in value: value['_ARRAY_DIMENSIONS'] = ( _axes + value['_ARRAY_DIMENSIONS'] ) value = _json_dumps(value) elif '.zarray' in key: level = int(key.split('/')[0]) if '/' in key else 0 keyframe = self._data[level].keyframe value = json.loads(value) if _shape: value['shape'] = _shape + value['shape'] value['chunks'] = [1] * len(_shape) + value['chunks'] codec_id = compressors[keyframe.compression] if codec_id == 'imagecodecs_jpeg': # TODO: handle JPEG color spaces jpegtables = keyframe.jpegtables if jpegtables is None: tables = None else: import base64 tables = base64.b64encode(jpegtables).decode() jpegheader = keyframe.jpegheader if jpegheader is None: header = None else: import base64 header = base64.b64encode(jpegheader).decode() ( colorspace_jpeg, colorspace_data, ) = jpeg_decode_colorspace( keyframe.photometric, keyframe.planarconfig, keyframe.extrasamples, keyframe.is_jfif, ) value['compressor'] = { 'id': codec_id, 'tables': tables, 'header': header, 'bitspersample': keyframe.bitspersample, 'colorspace_jpeg': colorspace_jpeg, 'colorspace_data': colorspace_data, } elif ( codec_id == 'imagecodecs_webp' and keyframe.samplesperpixel == 4 ): value['compressor'] = { 'id': codec_id, 'hasalpha': True, } elif codec_id == 'imagecodecs_eer': horzbits = vertbits = 2 if keyframe.compression == 65002: skipbits = int(keyframe.tags.valueof(65007, 7)) horzbits = int(keyframe.tags.valueof(65008, 2)) vertbits = int(keyframe.tags.valueof(65009, 2)) elif keyframe.compression == 65001: skipbits = 7 else: skipbits = 8 value['compressor'] = { 'id': codec_id, 'shape': keyframe.chunks, 'skipbits': skipbits, 'horzbits': horzbits, 'vertbits': vertbits, 'superres': keyframe.parent._superres, } elif codec_id is not None: value['compressor'] = {'id': codec_id} if byteorder is not None: value['dtype'] = byteorder + value['dtype'][1:] if keyframe.predictor > 1: # predictors need access to chunk shape and dtype # requires imagecodecs > 2021.8.26 to read if keyframe.predictor in {2, 34892, 34893}: filter_id = 'imagecodecs_delta' else: filter_id = 'imagecodecs_floatpred' if keyframe.predictor <= 3: dist = 1 elif keyframe.predictor in {34892, 34894}: dist = 2 else: dist = 4 if ( keyframe.planarconfig == 1 and keyframe.samplesperpixel > 1 ): axis = -2 else: axis = -1 value['filters'] = [ { 'id': filter_id, 'axis': axis, 'dist': dist, 'shape': value['chunks'], 'dtype': value['dtype'], } ] value = _json_dumps(value) refzarr[groupname + key] = value.decode() fh: TextIO if hasattr(jsonfile, 'write'): fh = jsonfile # type: ignore[assignment] else: fh = open(jsonfile, 'w', encoding='utf-8') if version == 1: fh.write(json.dumps(refs, indent=1).rsplit('}"', 1)[0] + '}"') indent = ' ' elif _append: indent = ' ' else: fh.write(json.dumps(refs, indent=1)[:-2]) indent = ' ' offset: int | None for key, value in self._store.items(): if '.zarray' in key: value = json.loads(value) shape = value['shape'] chunks = value['chunks'] levelstr = (key.split('/')[0] + '/') if '/' in key else '' for chunkindex in _ndindex(shape, chunks): key = levelstr + chunkindex keyframe, page, _, offset, bytecount = self._parse_key(key) if page and self._chunkmode and offset is None: offset = page.dataoffsets[0] bytecount = keyframe.nbytes if offset and bytecount: fname = keyframe.parent.filehandle.name if version == 1: fname = templates[fname] else: fname = f'{url}{fname}' fh.write( f',\n{indent}"{groupname}{key}": ' f'["{fname}", {offset}, {bytecount}]' ) # TODO: support nested groups if version == 1: fh.write('\n }\n}') elif _close: fh.write('\n}') if not hasattr(jsonfile, 'write'): fh.close() async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None, ) -> Buffer | None: """Return value associated with key.""" # print(f'get({key=}, {byte_range=})') if byte_range is not None: raise NotImplementedError(f'{byte_range=!r} not supported') if key in self._store: return prototype.buffer.from_bytes(self._store[key]) if ( key == 'zarr.json' or key[-10:] == '.zmetadata' or key[-7:] == '.zarray' or key[-7:] == '.zgroup' ): # catch '.zarray' and 'attribute/.zarray' return None keyframe, page, chunkindex, offset, bytecount = self._parse_key(key) if page is None or offset == 0 or bytecount == 0: return None fh = page.parent.filehandle if self._chunkmode: if offset is not None: # contiguous image data in page or series # create virtual frame instead of loading page from file assert bytecount is not None page = TiffFrame( page.parent, index=0, keyframe=keyframe, dataoffsets=(offset,), databytecounts=(bytecount,), ) # TODO: use asyncio.to_thread ? self._filecache.open(fh) chunk = page.asarray( lock=self._filecache.lock, maxworkers=self._maxworkers, buffersize=self._buffersize, ) self._filecache.close(fh) if self._transform is not None: chunk = self._transform(chunk) return prototype.buffer(chunk.reshape(-1).view('B')) assert offset is not None and bytecount is not None chunk_bytes = self._filecache.read(fh, offset, bytecount) decodeargs: dict[str, Any] = {'_fullsize': True} if page.jpegtables is not None: decodeargs['jpegtables'] = page.jpegtables if keyframe.jpegheader is not None: decodeargs['jpegheader'] = keyframe.jpegheader assert chunkindex is not None keyframe.decode # cache decode function if self._maxworkers > 1: decoded = await asyncio.to_thread( keyframe.decode, chunk_bytes, chunkindex, **decodeargs ) else: decoded = keyframe.decode(chunk_bytes, chunkindex, **decodeargs) chunk = decoded[0] # type: ignore[assignment] del decoded assert chunk is not None if self._transform is not None: chunk = self._transform(chunk) if self._chunkmode: chunks = keyframe.shape # type: ignore[unreachable] else: chunks = keyframe.chunks if chunk.size != product(chunks): raise RuntimeError(f'{chunk.size} != {product(chunks)}') return prototype.buffer(chunk.reshape(-1).view('B')) async def exists(self, key: str) -> bool: """Return whether key exists in store.""" # print(f'exists({key=})') if key in self._store: return True assert isinstance(key, str) try: _, page, _, offset, bytecount = self._parse_key(key) except (KeyError, IndexError): return False if self._chunkmode and offset is None: return True return ( page is not None and offset is not None and bytecount is not None and offset > 0 and bytecount > 0 ) async def set(self, key: str, value: Buffer) -> None: """Store (key, value) pair.""" if self._read_only: raise PermissionError('ZarrTiffStore is read-only') if ( key in self._store or key == 'zarr.json' or key[-10:] == '.zmetadata' or key[-7:] == '.zarray' or key[-7:] == '.zgroup' ): # catch '.zarray' and 'attribute/.zarray' return None keyframe, page, chunkindex, offset, bytecount = self._parse_key(key) if ( page is None or offset is None or offset == 0 or bytecount is None or bytecount == 0 ): return data = value.to_bytes() if bytecount < len(data): data = data[:bytecount] self._filecache.write(page.parent.filehandle, offset, data) def _parse_key(self, key: str, /) -> tuple[ TiffPage, TiffPage | TiffFrame | None, int | None, int | None, int | None, ]: """Return keyframe, page, index, offset, and bytecount from key. Raise KeyError if key is not valid. """ if self._multiscales: try: level, key = key.split('/') series = self._data[int(level)] except (ValueError, IndexError) as exc: raise KeyError(key) from exc else: series = self._data[0] keyframe = series.keyframe pageindex, chunkindex = self._indices(key, series) if series.dataoffset is not None: # contiguous or truncated page = series[0] if page is None or page.dtype is None or page.keyframe is None: return keyframe, None, chunkindex, 0, 0 offset = pageindex * page.size * page.dtype.itemsize try: offset += page.dataoffsets[chunkindex] except IndexError as exc: raise KeyError(key) from exc if self._chunkmode: bytecount = page.size * page.dtype.itemsize return page.keyframe, page, chunkindex, offset, bytecount elif self._chunkmode: with self._filecache.lock: page = series[pageindex] if page is None or page.keyframe is None: return keyframe, None, None, 0, 0 return page.keyframe, page, None, None, None else: with self._filecache.lock: page = series[pageindex] if page is None or page.keyframe is None: return keyframe, None, chunkindex, 0, 0 try: offset = page.dataoffsets[chunkindex] except IndexError: # raise KeyError(key) from exc # issue #249: Philips may be missing last row of tiles return page.keyframe, page, chunkindex, 0, 0 try: bytecount = page.databytecounts[chunkindex] except IndexError as exc: raise KeyError(key) from exc return page.keyframe, page, chunkindex, offset, bytecount def _indices(self, key: str, series: TiffPageSeries, /) -> tuple[int, int]: """Return page and strile indices from Zarr chunk index.""" keyframe = series.keyframe shape = series.get_shape(self._squeeze) try: indices = [int(i) for i in key.split('.')] except ValueError as exc: raise KeyError(key) from exc assert len(indices) == len(shape) if self._chunkmode: chunked = (1,) * len(keyframe.shape) else: chunked = keyframe.chunked p = 1 for i, s in enumerate(shape[::-1]): p *= s if p == keyframe.size: i = len(indices) - i - 1 frames_indices = indices[:i] strile_indices = indices[i:] frames_chunked = shape[:i] strile_chunked = list(shape[i:]) # updated later break else: raise RuntimeError if len(strile_chunked) == len(keyframe.shape): strile_chunked = list(chunked) else: # get strile_chunked including singleton dimensions i = len(strile_indices) - 1 j = len(keyframe.shape) - 1 while True: if strile_chunked[i] == keyframe.shape[j]: strile_chunked[i] = chunked[j] i -= 1 j -= 1 elif strile_chunked[i] == 1: i -= 1 else: raise RuntimeError('shape does not match page shape') if i < 0 or j < 0: break assert product(strile_chunked) == product(chunked) if len(frames_indices) > 0: frameindex = int( numpy.ravel_multi_index(frames_indices, frames_chunked) ) else: frameindex = 0 if len(strile_indices) > 0: strileindex = int( numpy.ravel_multi_index(strile_indices, strile_chunked) ) else: strileindex = 0 return frameindex, strileindex class ZarrFileSequenceStore(ZarrStore): """Zarr 3 store interface to image array in FileSequence. Parameters: filesequence: FileSequence instance to wrap as Zarr store. Files in containers are not supported. fillvalue: Value to use for missing chunks. The default is 0. chunkmode: Currently only one chunk per file is supported. chunkshape: Shape of chunk in each file. Must match ``FileSequence.imread(file, **imreadargs).shape``. chunkdtype: Data type of chunk in each file. Must match ``FileSequence.imread(file, **imreadargs).dtype``. axestiled: Axes to be tiled. Map stacked sequence axis to chunk axis. zattrs: Additional attributes to store in `.zattrs`. ioworkers: If not 1, asynchronously run `imread` function in separate thread. If enabled, internal threading for the `imread` function should be disabled. read_only: Passed to :py:class:`zarr.abc.store.Store`. imreadargs: Arguments passed to :py:attr:`FileSequence.imread`. **kwargs: Arguments passed to :py:attr:`FileSequence.imread`in addition to `imreadargs`. Notes: If `chunkshape` or `chunkdtype` are *None* (default), their values are determined by reading the first file with ``FileSequence.imread(arg.files[0], **imreadargs)``. """ imread: Callable[..., NDArray[Any]] """Function to read image array from single file.""" _lookup: dict[tuple[int, ...], str] _chunks: tuple[int, ...] _dtype: numpy.dtype[Any] _tiled: TiledSequence _commonpath: str _ioworkers: int _kwargs: dict[str, Any] def __init__( self, filesequence: FileSequence, /, *, fillvalue: int | float | None = None, chunkmode: CHUNKMODE | int | str | None = None, chunkshape: Sequence[int] | None = None, chunkdtype: DTypeLike | None = None, axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None, zattrs: dict[str, Any] | None = None, ioworkers: int | None = 1, imreadargs: dict[str, Any] | None = None, read_only: bool = True, **kwargs: Any, ) -> None: super().__init__( fillvalue=fillvalue, chunkmode=chunkmode, read_only=read_only ) if self._chunkmode not in {0, 3}: raise ValueError(f'invalid chunkmode {self._chunkmode!r}') if not isinstance(filesequence, FileSequence): raise TypeError('not a FileSequence') if filesequence._container: raise NotImplementedError('cannot open container as Zarr store') # TODO: deprecate kwargs? if imreadargs is not None: kwargs |= imreadargs self._ioworkers = 1 if ioworkers is None else ioworkers self._kwargs = kwargs self._imread = filesequence.imread self._commonpath = filesequence.commonpath() if chunkshape is None or chunkdtype is None: chunk = filesequence.imread(filesequence[0], **kwargs) self._chunks = chunk.shape self._dtype = chunk.dtype else: self._chunks = tuple(chunkshape) self._dtype = numpy.dtype(chunkdtype) chunk = None self._tiled = TiledSequence( filesequence.shape, self._chunks, axestiled=axestiled ) self._lookup = dict( zip(self._tiled.indices(filesequence.indices), filesequence) ) zattrs = {} if zattrs is None else dict(zattrs) # TODO: add _ARRAY_DIMENSIONS to ZarrFileSequenceStore # if '_ARRAY_DIMENSIONS' not in zattrs: # zattrs['_ARRAY_DIMENSIONS'] = list(...) self._store['.zattrs'] = _json_dumps(zattrs) self._store['.zarray'] = _json_dumps( { 'zarr_format': 2, 'shape': self._tiled.shape, 'chunks': self._tiled.chunks, 'dtype': _dtype_str(self._dtype), 'compressor': None, 'fill_value': _json_value(fillvalue, self._dtype), 'order': 'C', 'filters': None, } ) async def exists(self, key: str) -> bool: """Return whether key exists in store.""" # print(f'exists({key=})') if key in self._store: return True assert isinstance(key, str) try: indices = tuple(int(i) for i in key.split('.')) except Exception: return False return indices in self._lookup async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None, ) -> Buffer | None: """Return value associated with key.""" if byte_range is not None: raise NotImplementedError(f'{byte_range=!r} not supported') if key in self._store: return prototype.buffer.from_bytes(self._store[key]) if ( key == 'zarr.json' or key[-10:] == '.zmetadata' or key[-7:] == '.zarray' or key[-7:] == '.zgroup' ): # catch '.zarray' and 'attribute/.zarray' return None indices = tuple(int(i) for i in key.split('.')) filename = self._lookup.get(indices, None) if filename is None: return None if self._ioworkers != 1: chunk = await asyncio.to_thread( self._imread, filename, **self._kwargs ) else: chunk = self._imread(filename, **self._kwargs) return prototype.buffer(chunk.reshape(-1).view('B')) def write_fsspec( self, jsonfile: str | os.PathLike[Any] | TextIO, /, url: str | None, *, quote: bool | None = None, groupname: str | None = None, templatename: str | None = None, codec_id: str | None = None, version: int | None = None, _append: bool = False, _close: bool = True, ) -> None: """Write fsspec ReferenceFileSystem as JSON to file. Parameters: jsonfile: Name or open file handle of output JSON file. url: Remote location of TIFF file(s) without file name(s). quote: Quote file names, that is, replace ' ' with '%20'. The default is True. groupname: Zarr group name. templatename: Version 1 URL template name. The default is 'u'. codec_id: Name of Numcodecs codec to decode files or chunks. version: Version of fsspec file to write. The default is 0. _append, _close: Experimental API. References: - `fsspec ReferenceFileSystem format `_ """ from urllib.parse import quote as quote_ kwargs = self._kwargs.copy() if codec_id is not None: pass elif self._imread is imread: codec_id = 'tifffile' elif 'imagecodecs' in self._imread.__module__: if ( self._imread.__name__ != 'imread' or 'codec' not in self._kwargs ): raise ValueError('cannot determine codec_id') codec = kwargs.pop('codec') if isinstance(codec, (list, tuple)): codec = codec[0] if callable(codec): codec = codec.__name__.split('_')[0] codec_id = { 'apng': 'imagecodecs_apng', 'avif': 'imagecodecs_avif', 'gif': 'imagecodecs_gif', 'heif': 'imagecodecs_heif', 'jpeg': 'imagecodecs_jpeg', 'jpeg8': 'imagecodecs_jpeg', 'jpeg12': 'imagecodecs_jpeg', 'jpeg2k': 'imagecodecs_jpeg2k', 'jpegls': 'imagecodecs_jpegls', 'jpegxl': 'imagecodecs_jpegxl', 'jpegxr': 'imagecodecs_jpegxr', 'ljpeg': 'imagecodecs_ljpeg', 'lerc': 'imagecodecs_lerc', # 'npy': 'imagecodecs_npy', 'png': 'imagecodecs_png', 'qoi': 'imagecodecs_qoi', 'tiff': 'imagecodecs_tiff', 'webp': 'imagecodecs_webp', 'zfp': 'imagecodecs_zfp', }[codec] else: # TODO: choose codec from filename raise ValueError('cannot determine codec_id') if url is None: url = '' elif url and url[-1] != '/': url += '/' if groupname is None: groupname = '' elif groupname and groupname[-1] != '/': groupname += '/' refs: dict[str, Any] = {} if version == 1: if _append: raise ValueError('cannot append to version 1 files') if templatename is None: templatename = 'u' refs['version'] = 1 refs['templates'] = {templatename: url} refs['gen'] = [] refs['refs'] = refzarr = {} url = f'{{{{{templatename}}}}}' else: refzarr = refs if groupname and not _append: refzarr['.zgroup'] = _json_dumps({'zarr_format': 2}).decode() for key, value in self._store.items(): if '.zarray' in key: value = json.loads(value) # TODO: make kwargs serializable value['compressor'] = {'id': codec_id, **kwargs} value = _json_dumps(value) refzarr[groupname + key] = value.decode() fh: TextIO if hasattr(jsonfile, 'write'): fh = jsonfile # type: ignore[assignment] else: fh = open(jsonfile, 'w', encoding='utf-8') if version == 1: fh.write(json.dumps(refs, indent=1).rsplit('}"', 1)[0] + '}"') indent = ' ' elif _append: fh.write(',\n') fh.write(json.dumps(refs, indent=1)[2:-2]) indent = ' ' else: fh.write(json.dumps(refs, indent=1)[:-2]) indent = ' ' prefix = len(self._commonpath) for key, value in self._store.items(): if '.zarray' in key: value = json.loads(value) for index, filename in sorted( self._lookup.items(), key=lambda x: x[0] ): filename = filename[prefix:].replace('\\', '/') if quote is None or quote: filename = quote_(filename) if filename[0] == '/': filename = filename[1:] indexstr = '.'.join(str(i) for i in index) fh.write( f',\n{indent}"{groupname}{indexstr}": ' f'["{url}{filename}"]' ) if version == 1: fh.write('\n }\n}') elif _close: fh.write('\n}') if not hasattr(jsonfile, 'write'): fh.close() def zarr_selection( store: ZarrStore, selection: BasicSelection, /, *, groupindex: str | None = None, close: bool = True, out: OutputType = None, ) -> NDArray[Any]: """Return selection from Zarr store. Parameters: store: ZarrStore instance to read selection from. selection: Subset of image to be extracted and returned. Refer to the Zarr documentation for valid selections. groupindex: Index of array if store is Zarr group. close: Close store before returning. out: Specifies how image array is returned. By default, create a new array. If a *numpy.ndarray*, a writable array to which the images are copied. If *'memmap'*, create a memory-mapped array in a temporary file. If a *string* or *open file*, the file used to create a memory-mapped array. """ import zarr from zarr.core.indexing import BasicIndexer zarray: zarr.Array z = zarr.open(store, mode='r', zarr_format=2) try: if isinstance(z, zarr.Group): if groupindex is None: groupindex = '0' zarray = z[groupindex] # type: ignore[assignment] else: zarray = z if out is not None: shape = BasicIndexer( selection, shape=zarray.shape, chunk_grid=RegularChunkGrid(chunk_shape=zarray.chunks), ).shape ndbuffer = NDBuffer.from_numpy_array( create_output(out, shape, zarray.dtype) ) else: ndbuffer = None result = zarray.get_basic_selection(selection, out=ndbuffer) del zarray finally: if close: store.close() return result # type: ignore[return-value] def _empty_chunk( shape: tuple[int, ...], dtype: DTypeLike, fillvalue: int | float | None, /, ) -> NDArray[Any]: """Return empty chunk.""" if fillvalue is None or fillvalue == 0: # return bytes(product(shape) * dtype.itemsize) return numpy.zeros(shape, dtype) chunk = numpy.empty(shape, dtype) chunk[:] = fillvalue return chunk # .tobytes() def _dtype_str(dtype: numpy.dtype[Any], /) -> str: """Return dtype as string with native byte order.""" if dtype.itemsize == 1: byteorder = '|' else: byteorder = {'big': '>', 'little': '<'}[sys.byteorder] return byteorder + dtype.str[1:] def _json_dumps(obj: Any, /) -> bytes: """Serialize object to JSON formatted string.""" return json.dumps( obj, indent=1, sort_keys=True, ensure_ascii=True, separators=(',', ': '), ).encode('ascii') def _json_value(value: Any, dtype: numpy.dtype[Any], /) -> Any: """Return value which is serializable to JSON.""" if value is None: return value if dtype.kind == 'b': return bool(value) if dtype.kind in 'ui': return int(value) if dtype.kind == 'f': if numpy.isnan(value): return 'NaN' if numpy.isposinf(value): return 'Infinity' if numpy.isneginf(value): return '-Infinity' return float(value) if dtype.kind == 'c': value = numpy.array(value, dtype) return ( _json_value(value.real, dtype.type().real.dtype), _json_value(value.imag, dtype.type().imag.dtype), ) return value def _ndindex( shape: tuple[int, ...], chunks: tuple[int, ...], / ) -> Iterator[str]: """Return iterator over all chunk index strings.""" assert len(shape) == len(chunks) chunked = tuple( i // j + (1 if i % j else 0) for i, j in zip(shape, chunks) ) for indices in numpy.ndindex(chunked): yield '.'.join(str(index) for index in indices) def _is_writable(keyframe: TiffPage) -> bool: """Return True if chunks are writable.""" return ( keyframe.compression == 1 and keyframe.fillorder == 1 and keyframe.sampleformat in {1, 2, 3, 6} and keyframe.bitspersample in {8, 16, 32, 64, 128} # and ( # keyframe.rowsperstrip == 0 # or keyframe.imagelength % keyframe.rowsperstrip == 0 # ) ) def _chunks( chunks: tuple[int, ...], shape: tuple[int, ...], shaped: tuple[int, int, int, int, int], /, ) -> tuple[int, ...]: """Return chunks with same length as shape.""" ndim = len(shape) if ndim == 0: return () # empty array if 0 in shape: return (1,) * ndim d = 0 if shaped[1] == 1 else 1 i = min(ndim, 3 + d) n = len(chunks) if ( n == 2 + d and i != 2 + d and shape[-1] == 1 and shape[-i:] == shaped[-i:] ): # planarconfig=contig with one sample chunks = chunks + (1,) if ndim < len(chunks): # remove leading dimensions of size 1 from chunks for i, size in enumerate(chunks): if size > 1: break chunks = chunks[i:] if ndim < len(chunks): raise ValueError(f'{shape=!r} is shorter than {chunks=!r}') # prepend size 1 dimensions to chunks to match length of shape return tuple([1] * (ndim - len(chunks)) + list(chunks))