weather module¶
Module to prepare weather data
ClimateConfig
¶
Parses and holds configuration for climate variables.
Source code in cropengine/weather.py
class ClimateConfig:
"""Parses and holds configuration for climate variables."""
def __init__(self, config_dict):
self.raw = config_dict
self.variables = config_dict["variables"]
self.all_bands = []
self.var_to_bands = {}
self.var_to_units = {}
self.var_to_conversion = {}
self.derived = set()
self._parse_variables()
def _parse_variables(self):
for var_name, info in self.variables.items():
self.var_to_units[var_name] = (
info.get("native_unit"),
info.get("target_unit"),
)
self.var_to_conversion[var_name] = info.get("conversion")
if info.get("derived", False):
self.derived.add(var_name)
bands = []
for key, value in info.items():
if key.startswith("band") and value is not None:
bands.append(value)
self.all_bands.append(value)
self.var_to_bands[var_name] = bands
def get_all_bands(self):
return list(set(self.all_bands))
def is_derived(self, var_name):
return var_name in self.derived
GEEWeatherDataProvider
¶
Handles data retrieval, processing, and export of weather data from Google Earth Engine in PCSE format.
IMPORTANT: This class strictly handles POINT data. If a Geometry/Polygon is provided, it extracts data for the CENTROID of that geometry only.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
start_date |
str |
Start date (YYYY-MM-DD). |
required |
end_date |
str |
End date (YYYY-MM-DD). |
required |
latitude |
float |
Latitude (if geometry not provided). |
None |
longitude |
float |
Longitude (if geometry not provided). |
None |
geometry |
ee.Geometry |
Polygon or geometry object. Will be converted to its Centroid. |
None |
source |
str |
Key in meteo.yaml (e.g., 'era5_land'). |
'era5_land' |
filepath |
str |
Default output path. |
None |
ee_project |
str |
GCloud project ID for GEE initialization. |
None |
**site_kwargs |
Extra metadata for the Excel header (e.g., Station, Country). |
{} |
Source code in cropengine/weather.py
class GEEWeatherDataProvider:
"""
Handles data retrieval, processing, and export of weather data from Google Earth Engine in PCSE format.
IMPORTANT: This class strictly handles POINT data. If a Geometry/Polygon is provided,
it extracts data for the CENTROID of that geometry only.
Args:
start_date (str): Start date (YYYY-MM-DD).
end_date (str): End date (YYYY-MM-DD).
latitude (float, optional): Latitude (if geometry not provided).
longitude (float, optional): Longitude (if geometry not provided).
geometry (ee.Geometry, optional): Polygon or geometry object. Will be converted to its Centroid.
source (str): Key in meteo.yaml (e.g., 'era5_land').
filepath (str, optional): Default output path.
ee_project (str, optional): GCloud project ID for GEE initialization.
**site_kwargs: Extra metadata for the Excel header (e.g., Station, Country).
"""
def __init__(
self,
start_date,
end_date,
latitude=None,
longitude=None,
geometry=None,
source="era5_land",
filepath=None,
ee_project=None,
**site_kwargs,
):
self._check_gee_initialized(ee_project)
if geometry:
if isinstance(geometry, (ee.Feature, ee.FeatureCollection)):
region_geom = geometry.geometry()
else:
region_geom = ee.Geometry(geometry)
# Calculate Centroid
try:
centroid_obj = region_geom.centroid(maxError=1)
coords = centroid_obj.coordinates().getInfo()
except Exception:
centroid_obj = region_geom.bounds(maxError=1).centroid(maxError=1)
coords = centroid_obj.coordinates().getInfo()
self.longitude = coords[0]
self.latitude = coords[1]
elif latitude is not None and longitude is not None:
self.latitude = latitude
self.longitude = longitude
else:
raise ValueError(
"Must provide either 'geometry' OR 'latitude' and 'longitude'."
)
self.region = ee.Geometry.Point([self.longitude, self.latitude])
self.start_date = start_date
self.end_date = end_date
self.source = source.lower()
self.filepath = filepath
self.site_kwargs = site_kwargs
self._cached_df = None
self._cached_elevation = None
with pkg_resources.files(configs).joinpath("meteo.yaml").open("r") as f:
full_config = yaml.safe_load(f)
if source.lower() not in full_config:
raise ValueError(
f"Source '{source}' not found. Available: {list(full_config.keys())}"
)
self.weather_config = full_config[source.lower()]
self.cfg = ClimateConfig(self.weather_config)
def _check_gee_initialized(self, project=None):
"""
Checks if GEE is initialized. If not, attempts to initialize.
"""
try:
ee.Image(0).getInfo()
except Exception:
print("GEE not initialized. Attempting initialization...")
try:
# Try initializing with specific project if provided, else default
if project:
ee.Initialize(project=project)
else:
ee.Initialize()
print("GEE Initialized successfully.")
except Exception as e:
raise RuntimeError(
f"Failed to initialize Earth Engine: {e}.\n"
"Please run 'earthengine authenticate' in your terminal first."
)
def _get_elevation(self):
if self._cached_elevation is not None:
return self._cached_elevation
try:
geom = self.region
dem_source = self.weather_config.get(
"dem_source", "projects/sat-io/open-datasets/GLO-30"
)
elev = (
ee.ImageCollection(dem_source)
.filterBounds(geom)
.first()
.sample(geom, scale=30)
.first()
.get("b1")
)
val = elev.getInfo()
self._cached_elevation = round(float(val), 3) if val is not None else 0.0
return self._cached_elevation
except Exception as e:
print(f"Warning: Could not fetch elevation ({e}). Defaulting to 0.")
self._cached_elevation = 0.0
return 0.0
def _extract_data(self):
"""
Extraction of weather data.
"""
band_names = self.cfg.get_all_bands()
scale = self.weather_config.get("default_scale", 5000)
collection_id = self.weather_config.get("collection")
ic = ee.ImageCollection(collection_id)
return extract_timeseries_to_point(
lat=self.latitude,
lon=self.longitude,
image_collection=ic,
start_date=self.start_date,
end_date=self.end_date,
band_names=band_names,
scale=scale,
)
def get_data(self):
if self._cached_df is not None:
return self._cached_df
df_raw = self._extract_data()
output = pd.DataFrame(index=df_raw.index)
output["date"] = df_raw["time"]
for var, bands in self.cfg.var_to_bands.items():
conversion = self.cfg.var_to_conversion.get(var)
converter_func = CONVERSION_FUNCS.get(conversion, lambda x: x)
inputs = [df_raw[b] for b in bands]
try:
output[var] = converter_func(*inputs)
except TypeError as e:
raise ValueError(
f"Error calculating {var}: Function '{conversion}' "
f"expected different arguments than provided bands {bands}. "
f"Details: {e}"
)
self._cached_df = output.round(3)
return self._cached_df
def save_weather_excel(self, filepath=None, **override_kwargs):
target_path = filepath or self.filepath
if not target_path:
raise ValueError("Invalid filepath.")
df = self.get_data()
meta_defaults = {
"Country": "Unknown",
"Station": "Unknown",
"Description": self.weather_config.get("description"),
"Source": self.weather_config.get("collection"),
"Contact": "Unknown",
"Missing values": -999,
"AngstromA": 0.25,
"AngstromB": 0.50,
"HasSunshine": False,
}
meta = {**meta_defaults, **self.site_kwargs, **override_kwargs}
excel_rows = []
excel_rows.append(["Site Characteristics"])
excel_rows.append(["Country", meta["Country"]])
excel_rows.append(["Station", meta["Station"]])
excel_rows.append(["Description", meta["Description"]])
excel_rows.append(["Source", meta["Source"]])
excel_rows.append(["Contact", meta["Contact"]])
excel_rows.append(["Missing values", meta["Missing values"]])
excel_rows.append(
[
"Longitude",
"Latitude",
"Elevation",
"AngstromA",
"AngstromB",
"HasSunshine",
]
)
excel_rows.append(
[
self.longitude,
self.latitude,
self._get_elevation(),
meta["AngstromA"],
meta["AngstromB"],
str(meta["HasSunshine"]).upper(),
]
)
excel_rows.append(["Observed data"])
var_order = ["IRRAD", "TMIN", "TMAX", "VAP", "WIND", "RAIN", "SNOWDEPTH"]
present_vars = [v for v in var_order if v in df.columns]
header_names = ["DAY"] + present_vars
excel_rows.append(header_names)
header_units = ["date"]
for v in present_vars:
if v in self.cfg.var_to_units:
header_units.append(self.cfg.var_to_units[v][1])
else:
header_units.append("-")
excel_rows.append(header_units)
df_export = df.copy()
df_export = df_export.fillna(meta["Missing values"])
df_export = df_export[["date"] + present_vars]
with pd.ExcelWriter(target_path, engine="openpyxl") as writer:
pd.DataFrame(excel_rows).to_excel(
writer, sheet_name="Sheet1", index=False, header=False, startrow=0
)
df_export.to_excel(
writer,
sheet_name="Sheet1",
index=False,
header=False,
startrow=len(excel_rows),
)
print(f"File saved successfully to {target_path}")