Source code for miranda.io._input

from __future__ import annotations

import logging.config
import os
from pathlib import Path
from types import GeneratorType

import netCDF4 as nc  # noqa

from miranda.scripting import LOGGING_CONFIG

logging.config.dictConfig(LOGGING_CONFIG)


__all__ = [
    "discover_data",
    "find_filepaths",
]


# FIXME: How are these two functions different?
[docs] def discover_data( input_files: str | os.PathLike | list[str | os.PathLike] | GeneratorType, suffix: str = "nc", recurse: bool = True, ) -> list[Path] | GeneratorType: """Discover data. Parameters ---------- input_files : str, pathlib.Path, list of str or Path, or GeneratorType Path or string to a file, a folder, or a generator of paths. suffix : str File-ending suffix to search for. Default: "nc". recurse : bool Whether to recurse through folders or not. Default: True. Returns ------- list of pathlib.Path or GeneratorType of pathlib.Path Warnings -------- Recursion through ".zarr" files is explicitly disabled. Recursive globs and generators will not be expanded/sorted. """ if isinstance(input_files, (Path, str)): input_files = Path(input_files) if input_files.is_dir(): if suffix.endswith("zarr") or not recurse: input_files = sorted(list(input_files.glob(f"*.{suffix}"))) else: input_files = input_files.rglob(f"*.{suffix}") if input_files.is_file(): logging.warning( "Data discovery yielded a single file. Casting to `list[Path]`." ) input_files = [input_files] elif isinstance(input_files, list): input_files = sorted(Path(p) for p in input_files) elif isinstance(input_files, GeneratorType): logging.warning( "A Generator was passed to `discover_data`. Passing object along..." ) pass else: raise NotImplementedError(f"input_files: {type(input_files)}") return input_files
[docs] def find_filepaths( source: str | Path | GeneratorType | list[Path | str], recursive: bool = True, file_suffixes: str | list[str] | None = None, **_, ) -> list[Path]: """Find all available filepaths at a given source. Parameters ---------- source : str, Path, GeneratorType, or list[str or Path] recursive : bool file_suffixes: str or list of str, optional Returns ------- list of pathlib.Path """ if file_suffixes is None: file_suffixes = ["*", ".*"] elif isinstance(file_suffixes, str): file_suffixes = [file_suffixes] found = list() if isinstance(source, (Path, str)): source = [source] for location in source: for pattern in file_suffixes: if "*" not in pattern: pattern = f"*{pattern}*" if recursive: found.extend([f for f in Path(location).expanduser().rglob(pattern)]) elif not recursive: found.extend([f for f in Path(location).expanduser().glob(pattern)]) else: raise ValueError(f"Recursive: {recursive}") return found