Source code for miranda.remote.remove

"""Remote File Removal Operations module."""
from __future__ import annotations

import logging.config
import warnings
from datetime import date
from getpass import getpass
from pathlib import Path
from types import GeneratorType

from miranda.io.utils import creation_date
from miranda.scripting import LOGGING_CONFIG
from miranda.storage import report_file_size

logging.config.dictConfig(LOGGING_CONFIG)

try:
    import fabric  # noqa
except ImportError:
    warnings.warn(
        f"{__name__} functions require additional dependencies. "
        "Please install them with `pip install miranda[remote]`."
    )


__all__ = [
    "delete_by_date",
    "delete_by_variable",
    "delete_duplicates",
    "file_emptier",
]


[docs] def file_emptier(*, file_list: list[str | Path] | GeneratorType) -> None: """Open and overwrite a list of file paths in order to delete data while preserving the file name. Parameters ---------- file_list : list of str or Path, or GeneratorType List of files to be overwritten Returns ------- None """ file_list = sorted([Path(f) for f in file_list]) logging.info( f"Found {len(file_list)} files totalling {report_file_size(file_list)}." ) for file in file_list: logging.warning(f"Overwriting {file}") open(file, "w").close()
[docs] def delete_by_date( *, source: str | Path, year: int | None = None, month: int | None = None, day: int | None = None, pattern: str | None = None, server: str | Path | None = None, user: str | None = None, password: str | None = None, date_object: date | None = None, ) -> None: """Remove a selection of files based on a given date of last modification. Parameters ---------- source: str or Path year: int, optional month: int, optional day: int, optional pattern: str, optional server: str or Path, optional user: str, optional password: str, optional date_object: date, optional Returns ------- None """ user = user or input("Username:") password = password or getpass("Password:") if year and month and day: date_selected = date(year, month, day) elif date_object: date_selected = date_object else: raise ValueError glob_pattern = pattern or "*.nc" nc_files = Path(source).rglob(glob_pattern) nc_files = list(nc_files) nc_files.sort() logging.info(f"Found {len(nc_files)} files totalling {report_file_size(nc_files)}.") context = None if server: connection = fabric.Connection( host=server, user=user, connect_kwargs=dict(password=password) ) context = connection.open() freed_space = 0 deleted_files = 0 for file in nc_files: if creation_date(file) == date_selected: freed_space += Path(file).stat().st_size logging.info(f"Deleting {file.name}") if context: context.remove(file) else: file.unlink() deleted_files += 1 logging.info( f"Removed {deleted_files} files totalling {report_file_size(freed_space)}" ) if server: context.close() return
[docs] def delete_duplicates( *, source: str | Path, target: str | Path, server: str | Path | None = None, user: str = None, password: str = None, pattern: str = None, delete_target_duplicates: bool = False, ) -> None: """ Parameters ---------- source : str or Path target : str or Path server : str or Path, optional user: str password : str pattern: str delete_target_duplicates : bool Returns ------- None """ user = user or input("Username:") password = password or getpass("Password:") glob_pattern = pattern or "*.nc" connection = fabric.Connection( host=server, user=user, connect_kwargs=dict(password=password) ) nc_files_source = Path(source).rglob(glob_pattern) nc_files_source = {f.stem for f in nc_files_source} nc_files_target = Path(target).rglob(glob_pattern) nc_file_duplicates = [] for f in nc_files_target: if f.name in nc_files_source: logging.info(f"Duplicate found: {f.name}") nc_file_duplicates.append(f) nc_file_duplicates.sort() logging.info( f"Found {len(nc_file_duplicates)} files totalling {report_file_size(nc_file_duplicates)}" ) freed_space = 0 deleted_files = 0 if delete_target_duplicates: with connection as context: for dup in nc_file_duplicates: freed_space += Path(dup).stat().st_size logging.info(f"Deleting {dup.name}") context.remove(dup) deleted_files += 1 logging.info( f"Removed { deleted_files} files totalling {report_file_size(freed_space)}." ) return
[docs] def delete_by_variable( *, target: str | Path | list[str | Path] | GeneratorType = None, variables: list[str], server: str | Path | None = None, user: str | None = None, password: str | None = None, file_suffix: str | None = None, delete: bool = False, ) -> None: """Delete according to variable name. Given target location(s), a list of variables and a server address, perform a glob search and delete file names starting with the variables identified Parameters ---------- target : str, Path, list of str or Path, or GeneratorType] variables : list of str server : str or Path, optional user : str, optional password : str, optional file_suffix : str, optional delete : bool Returns ------- None """ user = user or input("Username:") password = password or getpass("Password:") connection = fabric.Connection( host=server, user=user, connect_kwargs=dict(password=password) ) freed_space = 0 deleted_files = 0 for var in variables: glob_suffix = file_suffix or ".nc" if isinstance(target, (GeneratorType, list)): found = list() for location in target: found.extend([f for f in Path(location).rglob(f"{var}*{glob_suffix}")]) else: found = Path(target).rglob(f"{var}*{glob_suffix}") nc_files = [Path(f) for f in found] nc_files.sort() logging.info( f"Found {len(nc_files)} files totalling {report_file_size(nc_files)}" ) with connection as context: for file in nc_files: freed_space += Path(file).stat().st_size deleted_files += 1 if delete: logging.info(f"Deleting file {file.stem}") context.remove(file) logging.info( f"Removed {deleted_files} files totalling {report_file_size(freed_space)}" ) return