sensitivity module¶
Module for parameter sensitivity
WOFOSTSensitivityAnalyzer
¶
A robust Sensitivity Analysis (SA) tool for WOFOST simulations.
This class integrates SALib with the WOFOST simulation runner to perform global and local sensitivity analyses. It supports parallel execution, spatial aggregation (to filter out weather noise), and multiple SA methods.
Attributes:
| Name | Type | Description |
|---|---|---|
runner |
An instance of the simulation runner (Batch or Single) capable of spawning engine instances. |
|
engines |
dict |
A dictionary mapping location IDs to WOFOST engine instances. |
Source code in cropengine/sensitivity.py
class WOFOSTSensitivityAnalyzer:
"""
A robust Sensitivity Analysis (SA) tool for WOFOST simulations.
This class integrates SALib with the WOFOST simulation runner to perform
global and local sensitivity analyses. It supports parallel execution,
spatial aggregation (to filter out weather noise), and multiple SA methods.
Attributes:
runner: An instance of the simulation runner (Batch or Single) capable of
spawning engine instances.
engines (dict): A dictionary mapping location IDs to WOFOST engine instances.
"""
def __init__(self, runner):
"""
Initializes the analyzer and pre-loads simulation engines.
Args:
runner: A WOFOST simulation runner object. Must implement
`get_batch_rerunners()` or `get_rerunner()`.
"""
# DISABLE PCSE LOGGING
pcse_logger = logging.getLogger("pcse")
pcse_logger.handlers = []
pcse_logger.setLevel(logging.CRITICAL)
self.runner = runner
self.engines = {}
print("[SA] Loading simulation engines...")
if hasattr(runner, "get_batch_rerunners"):
self.engines = runner.get_batch_rerunners()
else:
self.engines = {0: runner.get_rerunner()}
def run_analysis(
self,
problem_dict,
method="sobol",
n_samples=128,
n_workers=4,
target_variable="TWSO",
mode="global",
sample_locations=10,
num_levels=4,
):
"""
Executes the Sensitivity Analysis workflow.
This method generates parameter samples using SALib, runs simulations across
selected locations in parallel, aggregates the results based on the specified
mode, and computes sensitivity indices.
Args:
problem_dict (dict): The standard SALib problem definition dictionary.
Example:
{
'num_vars': 2,
'names': ['TSUM1', 'SPAN'],
'bounds': [[800, 1200], [28, 35]]
}
method (str, optional): The SA method to use. Options:
- 'sobol': Variance-based (Best for interactions, computationally expensive).
- 'morris': Elementary effects (Good for screening many parameters).
- 'fast': Fourier Amplitude Sensitivity Test.
Defaults to 'sobol'.
n_samples (int, optional): The base sample size (N).
**Note:** The actual number of simulations depends on the method:
- Sobol: N * (2D + 2)
- Morris: N * (D + 1)
- FAST: N * D
Where D is the number of parameters. Defaults to 128.
n_workers (int, optional): Number of parallel processes to use. Defaults to 4.
target_variable (str, optional): The output column from the simulation results
to analyze (e.g., 'TWSO' for Total Dry Weight Storage Organs).
Defaults to 'TWSO'.
mode (str, optional): The spatial aggregation strategy.
- 'global': Runs simulations on `sample_locations` for each parameter set,
averages the results to remove weather noise, and returns a single
sensitivity report for the entire region.
- 'local': Performs a full sensitivity analysis for EACH location
individually. Returns a DataFrame containing indices for every location.
Defaults to 'global'.
sample_locations (int, optional): The number of random locations to select
from the batch runner to represent the region. If None, uses all available
locations (warning: this can be very slow). Defaults to 10.
num_levels (int, optional): Number of grid levels (specific to 'morris' method).
Defaults to 4.
Returns:
pd.DataFrame: A DataFrame containing the sensitivity indices.
- For 'sobol': Columns [Parameter, ST, S1, point_id]
- For 'morris': Columns [Parameter, mu_star, sigma, point_id]
- For 'fast': Columns [Parameter, ST, S1, point_id]
Raises:
ValueError: If an unknown method is specified.
"""
# 1. GENERATE SAMPLES (Method Specific)
if method == "sobol":
param_values = saltelli.sample(
problem_dict, n_samples, calc_second_order=False
)
elif method == "morris":
param_values = morris_sampler.sample(
problem_dict, n_samples, num_levels=num_levels
)
elif method == "fast":
param_values = fast_sampler.sample(problem_dict, n_samples)
else:
raise ValueError(
f"Unknown method: {method}. Available methods are: {'sobol', 'morris', 'fast'}."
)
total_runs_per_loc = len(param_values)
# 2. SELECT LOCATIONS
all_locs = list(self.engines.keys())
if sample_locations is None or sample_locations >= len(all_locs):
selected_locs = all_locs
else:
selected_locs = random.sample(all_locs, sample_locations)
print(
f"[SA] Mode: {mode.upper()} | Params: {total_runs_per_loc} | Locations: {len(selected_locs)}"
)
print(f"[SA] Total Simulations: {total_runs_per_loc * len(selected_locs)}")
# 2. PREPARE TASKS
tasks = []
for run_idx, row in enumerate(param_values):
overrides = self._row_to_overrides(row, problem_dict)
for loc_id in selected_locs:
engine = self.engines[loc_id]
# Payload: ( (run_idx, loc_id), engine, overrides )
tasks.append(((run_idx, loc_id), engine, overrides, target_variable))
# 4. EXECUTE SIMULATIONS
results_map = {idx: {} for idx in range(total_runs_per_loc)}
failure_count = 0
with ProcessPoolExecutor(max_workers=n_workers) as executor:
futures = [executor.submit(_sa_worker_wrapper, task) for task in tasks]
iterator = as_completed(futures)
for future in tqdm(iterator, total=len(futures), desc="[SA] Simulating"):
try:
r_idx, l_id, val, err_msg = future.result()
results_map[r_idx][l_id] = val
if err_msg:
failure_count += 1
logging.debug(f"Failure: {err_msg}")
except Exception as e:
print(f"Critical Worker Error: {e}")
failure_count += 1
if failure_count > 0:
print(f"[SA] Warning: {failure_count} simulations failed.")
# 5. AGGREGATE & ANALYZE
final_dfs = []
if mode == "global":
# Mean across locations for each parameter set
y_aggregated = np.zeros(total_runs_per_loc)
for idx in range(total_runs_per_loc):
vals = list(results_map[idx].values())
y_aggregated[idx] = np.mean(vals) if vals else 0.0
print(f"[SA] Analyzing global (Mean) sensitivity...")
df = self._analyze_results(problem_dict, param_values, y_aggregated, method)
df["point_id"] = "global"
return df
elif mode == "local":
print(f"[SA] Analyzing local sensitivity...")
for loc_id in selected_locs:
# Extract vector for this specific location
y_local = np.zeros(total_runs_per_loc)
for idx in range(total_runs_per_loc):
y_local[idx] = results_map[idx].get(loc_id, 0.0)
df_loc = self._analyze_results(
problem_dict, param_values, y_local, method
)
df_loc["point_id"] = loc_id
final_dfs.append(df_loc)
return pd.concat(final_dfs, ignore_index=True)
def _analyze_results(self, problem, X, Y, method):
"""
Internal helper to run the specific SALib analysis function.
Args:
problem (dict): SALib problem dictionary.
X (np.ndarray): The matrix of parameter samples used.
Y (np.ndarray): The vector of simulation results.
method (str): Analysis method ('sobol', 'morris', 'fast').
Returns:
pd.DataFrame: A standardized dataframe of sensitivity indices.
"""
if method == "sobol":
Si = sobol.analyze(problem, Y, calc_second_order=False)
return pd.DataFrame(
{"Parameter": problem["names"], "ST": Si["ST"], "S1": Si["S1"]}
).sort_values("ST", ascending=False)
elif method == "morris":
Si = morris_analyzer.analyze(problem, X, Y, conf_level=0.95)
return pd.DataFrame(
{
"Parameter": problem["names"],
"mu_star": Si["mu_star"],
"sigma": Si["sigma"],
}
).sort_values("mu_star", ascending=False)
elif method == "fast":
Si = fast.analyze(problem, Y)
return pd.DataFrame(
{"Parameter": problem["names"], "ST": Si["ST"], "S1": Si["S1"]}
).sort_values("ST", ascending=False)
def _row_to_overrides(self, row, problem):
"""
Maps a flat row of parameter values to the WOFOST override dictionary structure.
"""
crop_params = {}
for i, name in enumerate(problem["names"]):
crop_params[name] = row[i]
return {"crop_params": crop_params, "soil_params": {}, "site_params": {}}
__init__(self, runner)
special
¶
Initializes the analyzer and pre-loads simulation engines.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
runner |
A WOFOST simulation runner object. Must implement
|
required |
Source code in cropengine/sensitivity.py
def __init__(self, runner):
"""
Initializes the analyzer and pre-loads simulation engines.
Args:
runner: A WOFOST simulation runner object. Must implement
`get_batch_rerunners()` or `get_rerunner()`.
"""
# DISABLE PCSE LOGGING
pcse_logger = logging.getLogger("pcse")
pcse_logger.handlers = []
pcse_logger.setLevel(logging.CRITICAL)
self.runner = runner
self.engines = {}
print("[SA] Loading simulation engines...")
if hasattr(runner, "get_batch_rerunners"):
self.engines = runner.get_batch_rerunners()
else:
self.engines = {0: runner.get_rerunner()}
run_analysis(self, problem_dict, method='sobol', n_samples=128, n_workers=4, target_variable='TWSO', mode='global', sample_locations=10, num_levels=4)
¶
Executes the Sensitivity Analysis workflow.
This method generates parameter samples using SALib, runs simulations across selected locations in parallel, aggregates the results based on the specified mode, and computes sensitivity indices.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
problem_dict |
dict |
The standard SALib problem definition dictionary. Example: { 'num_vars': 2, 'names': ['TSUM1', 'SPAN'], 'bounds': [[800, 1200], [28, 35]] } |
required |
method |
str |
The SA method to use. Options: - 'sobol': Variance-based (Best for interactions, computationally expensive). - 'morris': Elementary effects (Good for screening many parameters). - 'fast': Fourier Amplitude Sensitivity Test. Defaults to 'sobol'. |
'sobol' |
n_samples |
int |
The base sample size (N). Note: The actual number of simulations depends on the method: - Sobol: N * (2D + 2) - Morris: N * (D + 1) - FAST: N * D Where D is the number of parameters. Defaults to 128. |
128 |
n_workers |
int |
Number of parallel processes to use. Defaults to 4. |
4 |
target_variable |
str |
The output column from the simulation results to analyze (e.g., 'TWSO' for Total Dry Weight Storage Organs). Defaults to 'TWSO'. |
'TWSO' |
mode |
str |
The spatial aggregation strategy.
- 'global': Runs simulations on |
'global' |
sample_locations |
int |
The number of random locations to select from the batch runner to represent the region. If None, uses all available locations (warning: this can be very slow). Defaults to 10. |
10 |
num_levels |
int |
Number of grid levels (specific to 'morris' method). Defaults to 4. |
4 |
Returns:
| Type | Description |
|---|---|
pd.DataFrame |
A DataFrame containing the sensitivity indices. - For 'sobol': Columns [Parameter, ST, S1, point_id] - For 'morris': Columns [Parameter, mu_star, sigma, point_id] - For 'fast': Columns [Parameter, ST, S1, point_id] |
Exceptions:
| Type | Description |
|---|---|
ValueError |
If an unknown method is specified. |
Source code in cropengine/sensitivity.py
def run_analysis(
self,
problem_dict,
method="sobol",
n_samples=128,
n_workers=4,
target_variable="TWSO",
mode="global",
sample_locations=10,
num_levels=4,
):
"""
Executes the Sensitivity Analysis workflow.
This method generates parameter samples using SALib, runs simulations across
selected locations in parallel, aggregates the results based on the specified
mode, and computes sensitivity indices.
Args:
problem_dict (dict): The standard SALib problem definition dictionary.
Example:
{
'num_vars': 2,
'names': ['TSUM1', 'SPAN'],
'bounds': [[800, 1200], [28, 35]]
}
method (str, optional): The SA method to use. Options:
- 'sobol': Variance-based (Best for interactions, computationally expensive).
- 'morris': Elementary effects (Good for screening many parameters).
- 'fast': Fourier Amplitude Sensitivity Test.
Defaults to 'sobol'.
n_samples (int, optional): The base sample size (N).
**Note:** The actual number of simulations depends on the method:
- Sobol: N * (2D + 2)
- Morris: N * (D + 1)
- FAST: N * D
Where D is the number of parameters. Defaults to 128.
n_workers (int, optional): Number of parallel processes to use. Defaults to 4.
target_variable (str, optional): The output column from the simulation results
to analyze (e.g., 'TWSO' for Total Dry Weight Storage Organs).
Defaults to 'TWSO'.
mode (str, optional): The spatial aggregation strategy.
- 'global': Runs simulations on `sample_locations` for each parameter set,
averages the results to remove weather noise, and returns a single
sensitivity report for the entire region.
- 'local': Performs a full sensitivity analysis for EACH location
individually. Returns a DataFrame containing indices for every location.
Defaults to 'global'.
sample_locations (int, optional): The number of random locations to select
from the batch runner to represent the region. If None, uses all available
locations (warning: this can be very slow). Defaults to 10.
num_levels (int, optional): Number of grid levels (specific to 'morris' method).
Defaults to 4.
Returns:
pd.DataFrame: A DataFrame containing the sensitivity indices.
- For 'sobol': Columns [Parameter, ST, S1, point_id]
- For 'morris': Columns [Parameter, mu_star, sigma, point_id]
- For 'fast': Columns [Parameter, ST, S1, point_id]
Raises:
ValueError: If an unknown method is specified.
"""
# 1. GENERATE SAMPLES (Method Specific)
if method == "sobol":
param_values = saltelli.sample(
problem_dict, n_samples, calc_second_order=False
)
elif method == "morris":
param_values = morris_sampler.sample(
problem_dict, n_samples, num_levels=num_levels
)
elif method == "fast":
param_values = fast_sampler.sample(problem_dict, n_samples)
else:
raise ValueError(
f"Unknown method: {method}. Available methods are: {'sobol', 'morris', 'fast'}."
)
total_runs_per_loc = len(param_values)
# 2. SELECT LOCATIONS
all_locs = list(self.engines.keys())
if sample_locations is None or sample_locations >= len(all_locs):
selected_locs = all_locs
else:
selected_locs = random.sample(all_locs, sample_locations)
print(
f"[SA] Mode: {mode.upper()} | Params: {total_runs_per_loc} | Locations: {len(selected_locs)}"
)
print(f"[SA] Total Simulations: {total_runs_per_loc * len(selected_locs)}")
# 2. PREPARE TASKS
tasks = []
for run_idx, row in enumerate(param_values):
overrides = self._row_to_overrides(row, problem_dict)
for loc_id in selected_locs:
engine = self.engines[loc_id]
# Payload: ( (run_idx, loc_id), engine, overrides )
tasks.append(((run_idx, loc_id), engine, overrides, target_variable))
# 4. EXECUTE SIMULATIONS
results_map = {idx: {} for idx in range(total_runs_per_loc)}
failure_count = 0
with ProcessPoolExecutor(max_workers=n_workers) as executor:
futures = [executor.submit(_sa_worker_wrapper, task) for task in tasks]
iterator = as_completed(futures)
for future in tqdm(iterator, total=len(futures), desc="[SA] Simulating"):
try:
r_idx, l_id, val, err_msg = future.result()
results_map[r_idx][l_id] = val
if err_msg:
failure_count += 1
logging.debug(f"Failure: {err_msg}")
except Exception as e:
print(f"Critical Worker Error: {e}")
failure_count += 1
if failure_count > 0:
print(f"[SA] Warning: {failure_count} simulations failed.")
# 5. AGGREGATE & ANALYZE
final_dfs = []
if mode == "global":
# Mean across locations for each parameter set
y_aggregated = np.zeros(total_runs_per_loc)
for idx in range(total_runs_per_loc):
vals = list(results_map[idx].values())
y_aggregated[idx] = np.mean(vals) if vals else 0.0
print(f"[SA] Analyzing global (Mean) sensitivity...")
df = self._analyze_results(problem_dict, param_values, y_aggregated, method)
df["point_id"] = "global"
return df
elif mode == "local":
print(f"[SA] Analyzing local sensitivity...")
for loc_id in selected_locs:
# Extract vector for this specific location
y_local = np.zeros(total_runs_per_loc)
for idx in range(total_runs_per_loc):
y_local[idx] = results_map[idx].get(loc_id, 0.0)
df_loc = self._analyze_results(
problem_dict, param_values, y_local, method
)
df_loc["point_id"] = loc_id
final_dfs.append(df_loc)
return pd.concat(final_dfs, ignore_index=True)