Source code for miranda.io._input
from __future__ import annotations
import logging
import pathlib
from types import GeneratorType
logger = logging.getLogger("miranda.io.input")
__all__ = [
"discover_data",
]
# FIXME: How are these two functions different?
[docs]
def discover_data(
input_files: str | pathlib.Path | list[str | pathlib.Path] | GeneratorType,
suffix: str = "nc",
recurse: bool = True,
) -> list[pathlib.Path] | GeneratorType:
"""
Discover data.
Parameters
----------
input_files : str, pathlib.Path, list of str or Path, or GeneratorType
Path or string to a file, a folder, or a generator of paths.
suffix : str
File-ending suffix to search for. Default: "nc".
recurse : bool
Whether to recurse through folders or not. Default: True.
Returns
-------
list of pathlib.Path or GeneratorType of pathlib.Path
Warnings
--------
Recursion through ".zarr" files is explicitly disabled. Recursive globs and generators will not be expanded/sorted.
"""
if isinstance(input_files, (pathlib.Path, str)):
input_files = pathlib.Path(input_files)
if input_files.is_dir():
if suffix.endswith("zarr") or not recurse:
input_files = sorted(list(input_files.glob(f"*.{suffix}")))
else:
input_files = input_files.rglob(f"*.{suffix}")
elif input_files.is_file():
logger.warning("Data discovery yielded a single file. Casting to `list[Path]`.")
input_files = [input_files]
elif isinstance(input_files, list):
input_files = sorted(pathlib.Path(p) for p in input_files)
elif isinstance(input_files, GeneratorType):
logger.warning("A Generator was passed to `discover_data`. Passing object along...")
pass
else:
raise NotImplementedError(f"input_files: {type(input_files)}")
return input_files