Source code for miranda.eccc._support_rvt

from __future__ import annotations

import datetime
import re
import urllib
from pathlib import Path

import pandas as pd

"https://api.weather.gc.ca/collections/climate-daily/items?datetime=1840-03-01%2000:00:00/2021-06-02%2000:00:00&STN_ID=10761&f=json&limit=1500000&startindex=0"

# TODO: Investigate the API definition: https://api.weather.gc.ca/collections/climate-hourly


# FIXME: This function is a WIP - requires work.
[docs] def gather_eccc_stations( timestep: str, start_date: datetime.datetime | str | None = None, end_date: datetime.datetime | str | None = None, climate_id: str | None = None, ) -> pd.DataFrame: """Collect ECCC station data from the Environment and Climate Change Canada API. Parameters ---------- timestep : str start_date : datetime.datetime or str, optional end_date : datetime.datetime or str, optional climate_id : str, optional Returns ------- pandas.DataFrame """ if timestep.lower() in ["hourly", "daily"]: base_url = f"https://api.weather.gc.ca/collections/climate-{timestep}/" else: raise ValueError(timestep) dates = [start_date, end_date] for i, date in enumerate(dates): if not date: dates[i] = datetime.datetime( year=1840, month=1, day=1, hour=0, minute=0, second=0 ).strftime("%Y-%m-%d %H:%M:%S") else: if re.match(r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$", date): dates[i] = datetime.datetime.fromisoformat(date).strftime( "%Y-%m-%d %H:%M:%S" ) dates[i] = str(dates[i]).replace(" ", "%20") date_range = "/".join(dates) facets = dict( f="json", datetime=date_range, CLIMATE_IDENTIFIER=climate_id, # PROVINCE_CODE=province, limit=20000, startindex=0, ) # if station_id: # facets["STN_ID"] = station_id facet_list = list() for k, v in facets.items(): facet_list.append(f"{k}={v}") request_facets = f"items?{'&'.join(facet_list)}" request_url = urllib.parse.urljoin(base_url, request_facets) # noqa # Use geopandas to convert the json output to a DataFrame. return pd.read_file(request_url)
if __name__ == "__main__": target_folder = Path().cwd().joinpath("downloaded") target_folder.mkdir(exist_ok=True) data = gather_eccc_stations( timestep="hourly", start_date="2019-01-01", end_date="2020-12-31", climate_id="7040815", )