intermediate backup
This commit is contained in:
@ -15,7 +15,7 @@ from .data_processing import (
|
||||
prepare_fold_data_and_loaders,
|
||||
TimeSeriesDataset
|
||||
)
|
||||
from .model import LSTMForecastLightningModule
|
||||
from forecasting_model.train.model import LSTMForecastLightningModule
|
||||
from .evaluation import (
|
||||
evaluate_fold_predictions,
|
||||
# Optionally expose the standalone evaluation utility if needed externally
|
||||
|
@ -5,9 +5,10 @@ import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
from typing import Tuple, Generator, List, Optional, Union, Dict, Literal, Type
|
||||
import math # Add math import
|
||||
|
||||
# Use relative import for utils within the package
|
||||
from .utils.config_model import DataConfig, FeatureConfig, TrainingConfig, EvaluationConfig, CrossValidationConfig
|
||||
from .utils.forecast_config_model import DataConfig, FeatureConfig, TrainingConfig, EvaluationConfig, CrossValidationConfig
|
||||
# Optional: Import wavelet library if needed later
|
||||
# import pywt
|
||||
|
||||
@ -264,31 +265,39 @@ def engineer_features(df: pd.DataFrame, target_col: str, feature_config: Feature
|
||||
if isinstance(nan_handler, str):
|
||||
if nan_handler in ['ffill', 'bfill']:
|
||||
fill_method = nan_handler
|
||||
logger.debug(f"Filling NaNs in generated features using method: '{fill_method}'")
|
||||
logger.debug(f"Selected NaN fill method for generated features: '{fill_method}'")
|
||||
elif nan_handler == 'mean':
|
||||
logger.warning("NaN filling with 'mean' in generated features is applied globally here;"
|
||||
" consider per-fold mean filling if lookahead is a concern.")
|
||||
# Calculate mean only on the slice provided, potentially leaking info if slice includes val/test
|
||||
# Better to use ffill/bfill here or handle after split
|
||||
fill_value = features_df[feature_cols_generated].mean() # Calculate mean per feature column
|
||||
logger.debug("Filling NaNs in generated features using column means.")
|
||||
fill_value = features_df[feature_cols_generated].mean()
|
||||
logger.debug("Selected NaN fill method: column means.")
|
||||
else:
|
||||
logger.warning(f"Unsupported string fill_nan method '{nan_handler}' for generated features. Using 'ffill'.")
|
||||
fill_method = 'ffill'
|
||||
fill_method = 'ffill' # Default to ffill if unsupported string
|
||||
elif isinstance(nan_handler, (int, float)):
|
||||
fill_value = float(nan_handler)
|
||||
logger.debug(f"Filling NaNs in generated features with value: {fill_value}")
|
||||
logger.debug(f"Selected NaN fill value for generated features: {fill_value}")
|
||||
else:
|
||||
logger.warning(f"Invalid fill_nan type: {type(nan_handler)}. NaNs in features may remain.")
|
||||
|
||||
# Apply filling only to generated feature columns
|
||||
if fill_method:
|
||||
features_df[feature_cols_generated] = features_df[feature_cols_generated].fillna(method=fill_method)
|
||||
if fill_method == 'ffill':
|
||||
features_df[feature_cols_generated] = features_df[feature_cols_generated].fillna(method='bfill')
|
||||
# Apply filling only to generated feature columns using recommended methods
|
||||
if fill_method == 'ffill':
|
||||
logger.debug("Applying .ffill() to generated features...")
|
||||
features_df[feature_cols_generated] = features_df[feature_cols_generated].ffill()
|
||||
# Apply bfill afterwards to handle any NaNs remaining at the very beginning
|
||||
logger.debug("Applying .bfill() to handle any remaining NaNs at the start...")
|
||||
features_df[feature_cols_generated] = features_df[feature_cols_generated].bfill()
|
||||
elif fill_method == 'bfill':
|
||||
logger.debug("Applying .bfill() to generated features...")
|
||||
features_df[feature_cols_generated] = features_df[feature_cols_generated].bfill()
|
||||
# Optionally apply ffill after bfill if you need to fill trailing NaNs (less common)
|
||||
# features_df[feature_cols_generated] = features_df[feature_cols_generated].ffill()
|
||||
elif fill_value is not None:
|
||||
# fillna with Series/dict for column-wise mean, or scalar for constant value
|
||||
logger.debug(f"Applying .fillna(value={fill_value}) to generated features...")
|
||||
features_df[feature_cols_generated] = features_df[feature_cols_generated].fillna(value=fill_value)
|
||||
# No else needed, if fill_method and fill_value are None, no filling happens
|
||||
|
||||
else:
|
||||
logger.warning("`fill_nan` is None. NaNs generated by feature engineering may remain.")
|
||||
|
||||
@ -366,36 +375,31 @@ class TimeSeriesCrossValidationSplitter:
|
||||
|
||||
# Estimate if None
|
||||
elif self.initial_train_size is None:
|
||||
min_samples_per_split_step = 2 # Heuristic minimum samples for val+test in one step
|
||||
# Estimate val/test based on *potential* train size (crude)
|
||||
# Assume train is roughly (1 - val - test) fraction for estimation
|
||||
estimated_train_frac = max(0.1, 1.0 - self.val_frac - self.test_frac) # Ensure non-zero
|
||||
estimated_train_n = int(self.n_samples * estimated_train_frac)
|
||||
val_test_size_per_step = max(min_samples_per_split_step, int(estimated_train_n * (self.val_frac + self.test_frac)))
|
||||
logger.info("Estimating fixed train size based on n_splits, val_frac, test_frac.")
|
||||
# Estimate based on the total space needed for all splits:
|
||||
# n_samples >= fixed_train_n + val_size + test_size + (n_splits - 1) * step_size
|
||||
# n_samples >= fixed_train_n + int(fixed_train_n*val_frac) + n_splits * int(fixed_train_n*test_frac)
|
||||
# n_samples >= fixed_train_n * (1 + val_frac + n_splits * test_frac)
|
||||
# fixed_train_n <= n_samples / (1 + val_frac + n_splits * test_frac)
|
||||
|
||||
# Tentative initial train size is total minus one val/test block
|
||||
fixed_train_n_est = self.n_samples - val_test_size_per_step
|
||||
denominator = 1.0 + self.val_frac + self.n_splits * self.test_frac
|
||||
if denominator <= 1.0: # Avoid division by zero or non-positive, and ensure train frac < 1
|
||||
raise ValueError(f"Cannot estimate initial_train_size. Combination of val_frac ({self.val_frac}), "
|
||||
f"test_frac ({self.test_frac}), and n_splits ({self.n_splits}) is invalid (denominator {denominator:.2f} <= 1.0).")
|
||||
|
||||
# Basic sanity checks
|
||||
if fixed_train_n_est <= 0:
|
||||
raise ValueError("Could not estimate a valid initial_train_size (<= 0). Please specify it or check CV fractions.")
|
||||
# Need at least 1 sample for train, val, test each theoretically
|
||||
est_val_size = max(1, int(fixed_train_n_est * self.val_frac))
|
||||
est_test_size = max(1, int(fixed_train_n_est * self.test_frac))
|
||||
if fixed_train_n_est + est_val_size + est_test_size > self.n_samples:
|
||||
# If the simple estimate is too large, reduce it more drastically
|
||||
# Try setting train size = 50% and see if val/test fit?
|
||||
fixed_train_n_est = int(self.n_samples * 0.5)
|
||||
est_val_size = max(1, int(fixed_train_n_est * self.val_frac))
|
||||
est_test_size = max(1, int(fixed_train_n_est * self.test_frac))
|
||||
if fixed_train_n_est <=0 or (fixed_train_n_est + est_val_size + est_test_size > self.n_samples):
|
||||
raise ValueError("Could not estimate a valid initial_train_size. Data too small relative to val/test fractions? Please specify initial_train_size.")
|
||||
estimated_size = int(self.n_samples / denominator)
|
||||
|
||||
logger.warning(f"initial_train_size not set, estimated fixed train size for rolling window: {fixed_train_n_est}. "
|
||||
"This is a heuristic; viability depends on n_splits and step size. Validation happens in split().")
|
||||
return fixed_train_n_est
|
||||
# Add a sanity check: ensure estimated size is reasonably large
|
||||
min_required_for_features = 1 # Placeholder - ideally get from FeatureConfig if possible, but complex here
|
||||
if estimated_size < min_required_for_features:
|
||||
raise ValueError(f"Estimated fixed train size ({estimated_size}) is too small. "
|
||||
f"Check CV config (n_splits={self.n_splits}, val_frac={self.val_frac}, test_frac={self.test_frac}) "
|
||||
f"relative to total samples ({self.n_samples}). Consider specifying initial_train_size manually.")
|
||||
|
||||
logger.info(f"Estimated fixed training window size: {estimated_size}")
|
||||
return estimated_size
|
||||
else:
|
||||
raise ValueError(f"Invalid initial_train_size: {self.initial_train_size}")
|
||||
raise ValueError(f"Invalid initial_train_size type or value: {self.initial_train_size}")
|
||||
|
||||
|
||||
def split(self) -> Generator[Tuple[np.ndarray, np.ndarray, np.ndarray], None, None]:
|
||||
@ -483,28 +487,31 @@ class TimeSeriesDataset(Dataset):
|
||||
"""
|
||||
PyTorch Dataset for time series forecasting.
|
||||
|
||||
Takes a NumPy array (features + target), sequence length, and forecast horizon,
|
||||
and returns (input_sequence, target_sequence) tuples. Compatible with PyTorch
|
||||
DataLoaders used by PyTorch Lightning.
|
||||
Takes a NumPy array (features + target), sequence length, and a list of
|
||||
specific forecast horizons. Returns (input_sequence, target_vector) tuples,
|
||||
where target_vector contains the target values at the specified future steps.
|
||||
"""
|
||||
def __init__(self, data_array: np.ndarray, sequence_length: int, forecast_horizon: int, target_col_index: int = 0):
|
||||
def __init__(self, data_array: np.ndarray, sequence_length: int, forecast_horizon: List[int], target_col_index: int = 0):
|
||||
"""
|
||||
Args:
|
||||
data_array: Numpy array of shape (n_samples, n_features).
|
||||
Assumes the target variable is one of the columns.
|
||||
sequence_length: Length of the input sequence (lookback window).
|
||||
forecast_horizon: Number of steps ahead to predict.
|
||||
forecast_horizon: List of specific steps ahead to predict (e.g., [1, 6, 12]).
|
||||
target_col_index: Index of the target column in data_array. Defaults to 0.
|
||||
"""
|
||||
if sequence_length <= 0:
|
||||
raise ValueError("sequence_length must be positive.")
|
||||
if forecast_horizon <= 0:
|
||||
raise ValueError("forecast_horizon must be positive.")
|
||||
if not forecast_horizon or not isinstance(forecast_horizon, list) or any(h <= 0 for h in forecast_horizon):
|
||||
raise ValueError("forecast_horizon must be a non-empty list of positive integers.")
|
||||
if data_array.ndim != 2:
|
||||
raise ValueError(f"data_array must be 2D, but got shape {data_array.shape}")
|
||||
min_len_required = sequence_length + forecast_horizon
|
||||
|
||||
self.max_horizon = max(forecast_horizon) # Find the furthest point needed
|
||||
|
||||
min_len_required = sequence_length + self.max_horizon
|
||||
if min_len_required > data_array.shape[0]:
|
||||
raise ValueError(f"sequence_length ({sequence_length}) + forecast_horizon ({forecast_horizon}) = {min_len_required} "
|
||||
raise ValueError(f"sequence_length ({sequence_length}) + max_horizon ({self.max_horizon}) = {min_len_required} "
|
||||
f"exceeds total samples provided ({data_array.shape[0]})")
|
||||
if not (0 <= target_col_index < data_array.shape[1]):
|
||||
raise ValueError(f"target_col_index ({target_col_index}) out of bounds for data with {data_array.shape[1]} columns.")
|
||||
@ -512,32 +519,37 @@ class TimeSeriesDataset(Dataset):
|
||||
|
||||
self.data = torch.tensor(data_array, dtype=torch.float32)
|
||||
self.sequence_length = sequence_length
|
||||
self.forecast_horizon = forecast_horizon
|
||||
self.forecast_horizon_list = sorted(forecast_horizon)
|
||||
self.target_col_index = target_col_index
|
||||
self.n_samples = data_array.shape[0]
|
||||
self.n_features = data_array.shape[1]
|
||||
|
||||
logger.debug(f"TimeSeriesDataset created: data shape={self.data.shape}, "
|
||||
f"seq_len={self.sequence_length}, forecast_horizon={self.forecast_horizon}, "
|
||||
f"target_idx={self.target_col_index}")
|
||||
f"seq_len={self.sequence_length}, forecast_horizons={self.forecast_horizon_list}, "
|
||||
f"max_horizon={self.max_horizon}, target_idx={self.target_col_index}")
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Returns the total number of sequences that can be generated."""
|
||||
return self.n_samples - self.sequence_length - self.forecast_horizon + 1
|
||||
return self.n_samples - self.sequence_length - self.max_horizon + 1
|
||||
|
||||
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Returns a single (input_sequence, target_sequence) pair.
|
||||
Returns a single (input_sequence, target_vector) pair.
|
||||
Target vector contains values for the specified forecast horizons.
|
||||
"""
|
||||
if not (0 <= idx < len(self)):
|
||||
raise IndexError(f"Index {idx} out of bounds for dataset with length {len(self)}")
|
||||
|
||||
input_start = idx
|
||||
input_end = idx + self.sequence_length
|
||||
input_sequence = self.data[input_start:input_end, :]
|
||||
target_start = input_end
|
||||
target_end = target_start + self.forecast_horizon
|
||||
target_sequence = self.data[target_start:target_end, self.target_col_index]
|
||||
return input_sequence, target_sequence
|
||||
input_sequence = self.data[input_start:input_end, :] # Shape: (seq_len, n_features)
|
||||
|
||||
# Calculate indices for each horizon relative to the end of the input sequence
|
||||
# Horizon h corresponds to index: input_end + h - 1
|
||||
target_indices = [input_end + h - 1 for h in self.forecast_horizon_list]
|
||||
target_vector = self.data[target_indices, self.target_col_index] # Shape: (len(forecast_horizon_list),)
|
||||
|
||||
return input_sequence, target_vector
|
||||
|
||||
# --- Data Preparation ---
|
||||
def prepare_fold_data_and_loaders(
|
||||
@ -576,6 +588,7 @@ def prepare_fold_data_and_loaders(
|
||||
feature_config: Configuration for feature engineering.
|
||||
train_config: Configuration for training (used for batch size, device hints).
|
||||
eval_config: Configuration for evaluation (used for batch size).
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple containing:
|
||||
@ -598,13 +611,25 @@ def prepare_fold_data_and_loaders(
|
||||
if feature_config.lags:
|
||||
max_lookback = max(max_lookback, max(feature_config.lags))
|
||||
if feature_config.rolling_window_sizes:
|
||||
max_lookback = max(max_lookback, max(feature_config.rolling_window_sizes) -1 )
|
||||
max_history_needed = max(max_lookback, feature_config.sequence_length)
|
||||
max_lookback = max(max_lookback, max(feature_config.rolling_window_sizes) -1)
|
||||
|
||||
# Also need history for the input sequence length and max target horizon
|
||||
max_horizon_needed = max(feature_config.forecast_horizon) if feature_config.forecast_horizon else 0
|
||||
# Max history needed is max of lookback for features OR (sequence_length + max_horizon - 1) for targets/inputs
|
||||
# Correct logic: Need `sequence_length` history for input, and `max_horizon` steps *after* the train data for targets/evaluation.
|
||||
# The slicing needs to ensure enough data *before* train_idx[0] for feature lookback *and* sequence_length.
|
||||
# Max history *before* the start of the training set
|
||||
max_history_needed_before_train = max(max_lookback, feature_config.sequence_length)
|
||||
|
||||
slice_start_idx = max(0, train_idx[0] - max_history_needed_before_train)
|
||||
# The end index needs to cover the test set PLUS the maximum horizon needed for the last test target
|
||||
slice_end_idx = test_idx[-1] + max_horizon_needed # Go up to the last needed target
|
||||
|
||||
# Ensure end index is within bounds
|
||||
slice_end_idx = min(slice_end_idx + 1, len(full_df)) # +1 because iloc is exclusive
|
||||
|
||||
slice_start_idx = max(0, train_idx[0] - max_history_needed)
|
||||
slice_end_idx = test_idx[-1] + 1
|
||||
if slice_start_idx >= slice_end_idx:
|
||||
raise ValueError(f"Calculated slice start ({slice_start_idx}) >= slice end ({slice_end_idx}). Check indices.")
|
||||
raise ValueError(f"Calculated slice start ({slice_start_idx}) >= slice end ({slice_end_idx}). Check indices and horizon.")
|
||||
|
||||
fold_data_slice = full_df.iloc[slice_start_idx:slice_end_idx]
|
||||
logger.debug(f"Required data slice for fold: indices {slice_start_idx} to {slice_end_idx-1} "
|
||||
@ -709,22 +734,38 @@ def prepare_fold_data_and_loaders(
|
||||
|
||||
input_size = train_data_scaled.shape[1]
|
||||
|
||||
# --- Ensure final data arrays are float32 for PyTorch ---
|
||||
try:
|
||||
# Explicitly convert to float32 AFTER scaling (or non-scaling)
|
||||
train_data_final = train_data_scaled.astype(np.float32)
|
||||
val_data_final = val_data_scaled.astype(np.float32)
|
||||
test_data_final = test_data_scaled.astype(np.float32)
|
||||
logger.debug("Ensured final data arrays are float32.")
|
||||
except ValueError as e:
|
||||
# This might happen if data cannot be safely cast (e.g., strings remain unexpectedly)
|
||||
logger.error(f"Failed to convert data arrays to float32 before creating Tensors: {e}", exc_info=True)
|
||||
# Consider adding more debug info here if it fails, e.g.:
|
||||
# logger.debug(f"Data types in train_df before conversion: \n{train_df.dtypes}")
|
||||
raise ValueError("Data could not be converted to numeric type (float32) for PyTorch.") from e
|
||||
|
||||
|
||||
# 6. Dataset Instantiation
|
||||
logger.debug("Creating TimeSeriesDataset instances for the fold.")
|
||||
try:
|
||||
# Use the explicitly converted arrays
|
||||
train_dataset = TimeSeriesDataset(
|
||||
train_data_scaled, feature_config.sequence_length, feature_config.forecast_horizon, target_col_index=target_col_index_in_features
|
||||
train_data_final, feature_config.sequence_length, feature_config.forecast_horizon, target_col_index=target_col_index_in_features
|
||||
)
|
||||
val_dataset = TimeSeriesDataset(
|
||||
val_data_scaled, feature_config.sequence_length, feature_config.forecast_horizon, target_col_index=target_col_index_in_features
|
||||
val_data_final, feature_config.sequence_length, feature_config.forecast_horizon, target_col_index=target_col_index_in_features
|
||||
)
|
||||
test_dataset = TimeSeriesDataset(
|
||||
test_data_scaled, feature_config.sequence_length, feature_config.forecast_horizon, target_col_index=target_col_index_in_features
|
||||
test_data_final, feature_config.sequence_length, feature_config.forecast_horizon, target_col_index=target_col_index_in_features
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.error(f"Error creating TimeSeriesDataset: {e}")
|
||||
logger.error(f"Shapes fed to Dataset: Train={train_data_scaled.shape}, Val={val_data_scaled.shape}, Test={test_data_scaled.shape}")
|
||||
logger.error(f"SeqLen={feature_config.sequence_length}, Horizon={feature_config.forecast_horizon}")
|
||||
logger.error(f"Shapes fed to Dataset: Train={train_data_final.shape}, Val={val_data_final.shape}, Test={test_data_final.shape}")
|
||||
logger.error(f"SeqLen={feature_config.sequence_length}, Horizons={feature_config.forecast_horizon}")
|
||||
raise
|
||||
|
||||
|
||||
@ -748,4 +789,69 @@ def prepare_fold_data_and_loaders(
|
||||
|
||||
logger.info("Data loaders prepared successfully for the fold.")
|
||||
|
||||
return train_loader, val_loader, test_loader, target_scaler, input_size
|
||||
return train_loader, val_loader, test_loader, target_scaler, input_size
|
||||
|
||||
# --- Classic Train/Val/Test Split ---
|
||||
|
||||
def split_data_classic(
|
||||
n_samples: int,
|
||||
val_frac: float,
|
||||
test_frac: float,
|
||||
start_from_end: bool = True
|
||||
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Splits data indices into one train, one validation, and one test set based on fractions.
|
||||
|
||||
Args:
|
||||
n_samples: Total number of samples in the dataset.
|
||||
val_frac: Fraction of the *total* data to use for validation.
|
||||
test_frac: Fraction of the *total* data to use for testing.
|
||||
start_from_end: If True (default), test and validation sets are taken from the end
|
||||
of the series. If False, they are taken after the initial training block.
|
||||
Default is True for typical time series evaluation.
|
||||
|
||||
Returns:
|
||||
Tuple of (train_indices, val_indices, test_indices).
|
||||
|
||||
Raises:
|
||||
ValueError: If fractions are invalid or sum to >= 1.
|
||||
"""
|
||||
if not (0 < val_frac < 1):
|
||||
raise ValueError(f"val_frac must be between 0 and 1, got {val_frac}")
|
||||
if not (0 < test_frac < 1):
|
||||
raise ValueError(f"test_frac must be between 0 and 1, got {test_frac}")
|
||||
if val_frac + test_frac >= 1:
|
||||
raise ValueError(f"Sum of val_frac ({val_frac}) and test_frac ({test_frac}) must be less than 1.")
|
||||
|
||||
test_size = math.ceil(n_samples * test_frac) # Use ceil to ensure at least one sample if frac is tiny
|
||||
val_size = math.ceil(n_samples * val_frac)
|
||||
train_size = n_samples - val_size - test_size
|
||||
|
||||
if train_size <= 0:
|
||||
raise ValueError(f"Calculated train_size ({train_size}) is not positive. Adjust fractions or increase data.")
|
||||
if val_size <= 0:
|
||||
raise ValueError("Calculated val_size is not positive.")
|
||||
if test_size <= 0:
|
||||
raise ValueError("Calculated test_size is not positive.")
|
||||
|
||||
|
||||
indices = np.arange(n_samples)
|
||||
|
||||
if start_from_end:
|
||||
train_indices = indices[:train_size]
|
||||
val_indices = indices[train_size:train_size + val_size]
|
||||
test_indices = indices[train_size + val_size:]
|
||||
# Adjust if ceil caused slight overallocation in test
|
||||
test_indices = test_indices[:test_size]
|
||||
else:
|
||||
# Less common: place val/test directly after train
|
||||
train_indices = indices[:train_size]
|
||||
val_indices = indices[train_size:train_size + val_size]
|
||||
test_indices = indices[train_size + val_size:train_size + val_size + test_size]
|
||||
# Remaining data is unused in this scenario
|
||||
|
||||
logger.info(f"Classic split: Train indices {train_indices[0]}-{train_indices[-1]} (size {len(train_indices)}), "
|
||||
f"Val indices {val_indices[0]}-{val_indices[-1]} (size {len(val_indices)}), "
|
||||
f"Test indices {test_indices[0]}-{test_indices[-1]} (size {len(test_indices)})")
|
||||
|
||||
return train_indices, val_indices, test_indices
|
@ -1,24 +1,22 @@
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path # Added
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchmetrics
|
||||
from torch.utils.data import DataLoader
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler # For type hinting target_scaler
|
||||
from typing import Dict, Any, Optional, Union, List, Tuple
|
||||
# import matplotlib.pyplot as plt # No longer needed directly
|
||||
# import seaborn as sns # No longer needed directly
|
||||
from typing import Dict, Optional, Union, List
|
||||
import pandas as pd # For time index type hint
|
||||
|
||||
# Assuming config_model and io.plotting are accessible
|
||||
from forecasting_model.utils.config_model import EvaluationConfig
|
||||
from forecasting_model.io.plotting import ( # Import the plotting utilities
|
||||
from forecasting_model.utils.forecast_config_model import EvaluationConfig
|
||||
from forecasting_model.train.model import LSTMForecastLightningModule
|
||||
from forecasting_model.io.plotting import (
|
||||
setup_plot_style,
|
||||
save_plot,
|
||||
create_time_series_plot,
|
||||
create_scatter_plot,
|
||||
create_residuals_plot,
|
||||
create_residuals_distribution_plot
|
||||
create_residuals_distribution_plot,
|
||||
)
|
||||
|
||||
|
||||
@ -82,90 +80,101 @@ def calculate_rmse_np(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
||||
return float(rmse)
|
||||
|
||||
|
||||
# --- Plotting Functions (Utilities) ---
|
||||
# REMOVED - These are now imported from io.plotting
|
||||
|
||||
|
||||
# --- Fold Evaluation Function ---
|
||||
|
||||
def evaluate_fold_predictions(
|
||||
y_true_scaled: np.ndarray,
|
||||
y_pred_scaled: np.ndarray,
|
||||
y_true_scaled: np.ndarray, # Shape: (n_samples, len(horizons))
|
||||
y_pred_scaled: np.ndarray, # Shape: (n_samples, len(horizons))
|
||||
target_scaler: Union[StandardScaler, MinMaxScaler, None],
|
||||
eval_config: EvaluationConfig,
|
||||
fold_num: int,
|
||||
output_dir: str, # Base output directory (e.g., output/cv_results)
|
||||
time_index: Optional[np.ndarray] = None # Optional: Pass time index for x-axis
|
||||
fold_num: int, # Zero-based fold index
|
||||
output_dir: str, # Base output directory
|
||||
plot_subdir: Optional[str] = "plots",
|
||||
# time_index: Optional[Union[np.ndarray, pd.Index]] = None, # OLD: Index for samples
|
||||
prediction_time_index: Optional[pd.Index] = None, # Index corresponding to the prediction times (n_samples,)
|
||||
forecast_horizons: Optional[List[int]] = None, # The list of horizons predicted (e.g., [1, 6, 12])
|
||||
plot_title_prefix: Optional[str] = None
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Processes prediction results for a fold's test set using torchmetrics.
|
||||
Processes prediction results (multiple horizons) for a fold or ensemble.
|
||||
|
||||
Takes scaled predictions and targets, inverse transforms them,
|
||||
calculates final metrics (MAE, RMSE) using torchmetrics.functional,
|
||||
and generates evaluation plots using utilities from io.plotting. Assumes
|
||||
model inference is already done.
|
||||
Takes scaled predictions and targets (shape: samples, num_horizons),
|
||||
inverse transforms them, calculates overall metrics (MAE, RMSE) across all horizons,
|
||||
and generates evaluation plots *for the first specified horizon only*.
|
||||
|
||||
Args:
|
||||
y_true_scaled: Numpy array of scaled ground truth targets (n_samples, horizon).
|
||||
y_pred_scaled: Numpy array of scaled model predictions (n_samples, horizon).
|
||||
target_scaler: The scaler fitted on the target variable during training. Needed
|
||||
for inverse transforming to original scale. Can be None.
|
||||
eval_config: Configuration object for evaluation parameters (e.g., plotting).
|
||||
fold_num: The current fold number (e.g., 0, 1, ...).
|
||||
output_dir: The base directory to save fold-specific outputs (plots, metrics).
|
||||
time_index: Optional array representing the time index for the test set,
|
||||
used for x-axis in time-based plots. If None, uses integer indices.
|
||||
y_true_scaled: Numpy array of scaled ground truth targets (n_samples, len(horizons)).
|
||||
y_pred_scaled: Numpy array of scaled model predictions (n_samples, len(horizons)).
|
||||
target_scaler: The scaler fitted on the target variable.
|
||||
eval_config: Configuration object for evaluation parameters.
|
||||
fold_num: The current fold number (zero-based or -1 for classic).
|
||||
output_dir: The base directory to save outputs.
|
||||
plot_subdir: Specific subdirectory under output_dir for plots.
|
||||
prediction_time_index: Pandas Index representing the time for each prediction point (n_samples,).
|
||||
Required for meaningful time plots.
|
||||
forecast_horizons: List of horizons predicted (e.g., [1, 6, 12]). Required for plotting.
|
||||
plot_title_prefix: Optional string to prepend to plot titles.
|
||||
|
||||
Returns:
|
||||
Dictionary containing evaluation metrics {'MAE': value, 'RMSE': value} on the
|
||||
original scale. Metrics will be NaN if inverse transform or calculation fails.
|
||||
|
||||
Raises:
|
||||
ValueError: If input shapes are inconsistent or required scaler is missing.
|
||||
original scale, calculated *across all predicted horizons*.
|
||||
"""
|
||||
logger.info(f"Processing evaluation results for Fold {fold_num + 1}...")
|
||||
fold_id = fold_num + 1 # Use 1-based indexing for reporting/filenames
|
||||
fold_id_str = f"Fold {fold_num + 1}" if fold_num >= 0 else "Classic Run"
|
||||
eval_context_str = f"{plot_title_prefix} {fold_id_str}" if plot_title_prefix else fold_id_str
|
||||
logger.info(f"Processing evaluation results for: {eval_context_str}")
|
||||
|
||||
if y_true_scaled.shape != y_pred_scaled.shape:
|
||||
raise ValueError(f"Shape mismatch between targets and predictions: "
|
||||
raise ValueError(f"Shape mismatch between targets and predictions for {eval_context_str}: "
|
||||
f"{y_true_scaled.shape} vs {y_pred_scaled.shape}")
|
||||
if y_true_scaled.ndim != 2:
|
||||
raise ValueError(f"Expected 2D arrays for targets and predictions, got {y_true_scaled.ndim}D")
|
||||
raise ValueError(f"Expected 2D arrays (samples, num_horizons) for {eval_context_str}, got {y_true_scaled.ndim}D")
|
||||
|
||||
n_samples, horizon = y_true_scaled.shape
|
||||
logger.debug(f"Processing {n_samples} samples with horizon {horizon}.")
|
||||
n_samples, n_horizons = y_true_scaled.shape
|
||||
logger.debug(f"Processing {n_samples} samples across {n_horizons} horizons for {eval_context_str}.")
|
||||
|
||||
# --- Inverse Transform (Outputs NumPy) ---
|
||||
y_true_flat_scaled = y_true_scaled.reshape(-1, 1)
|
||||
y_pred_flat_scaled = y_pred_scaled.reshape(-1, 1)
|
||||
# Flatten the multi-horizon arrays for the scaler (which expects (N, 1))
|
||||
y_true_flat_scaled = y_true_scaled.reshape(-1, 1) # Shape: (n_samples * n_horizons, 1)
|
||||
y_pred_flat_scaled = y_pred_scaled.reshape(-1, 1) # Shape: (n_samples * n_horizons, 1)
|
||||
|
||||
y_true_inv_np: np.ndarray
|
||||
y_pred_inv_np: np.ndarray
|
||||
|
||||
if target_scaler is not None:
|
||||
try:
|
||||
logger.debug("Inverse transforming predictions and targets.")
|
||||
y_true_inv_np = target_scaler.inverse_transform(y_true_flat_scaled)
|
||||
y_pred_inv_np = target_scaler.inverse_transform(y_pred_flat_scaled)
|
||||
# Flatten NumPy arrays for metric calculation and plotting
|
||||
y_true_np = y_true_inv_np.flatten()
|
||||
y_pred_np = y_pred_inv_np.flatten()
|
||||
logger.debug(f"Inverse transforming predictions and targets for {eval_context_str}.")
|
||||
y_true_inv_flat = target_scaler.inverse_transform(y_true_flat_scaled)
|
||||
y_pred_inv_flat = target_scaler.inverse_transform(y_pred_flat_scaled)
|
||||
# Reshape back to (n_samples, n_horizons) for potential per-horizon analysis later
|
||||
y_true_inv_np = y_true_inv_flat.reshape(n_samples, n_horizons)
|
||||
y_pred_inv_np = y_pred_inv_flat.reshape(n_samples, n_horizons)
|
||||
except Exception as e:
|
||||
logger.error(f"Error during inverse scaling for Fold {fold_id}: {e}", exc_info=True)
|
||||
logger.error(f"Error during inverse scaling for {eval_context_str}: {e}", exc_info=True)
|
||||
logger.error("Metrics calculation will be skipped due to inverse transform failure.")
|
||||
return {'MAE': np.nan, 'RMSE': np.nan}
|
||||
else:
|
||||
logger.info("No target scaler provided, assuming inputs are already on original scale.")
|
||||
# Flatten NumPy arrays for metric calculation and plotting
|
||||
y_true_np = y_true_flat_scaled.flatten()
|
||||
y_pred_np = y_pred_flat_scaled.flatten()
|
||||
|
||||
# --- Calculate Metrics using torchmetrics.functional ---
|
||||
metrics: Dict[str, float] = {'MAE': np.nan, 'RMSE': np.nan} # Initialize with NaN
|
||||
else:
|
||||
logger.info(f"No target scaler provided for {eval_context_str}, assuming inputs are on original scale.")
|
||||
y_true_inv_np = y_true_scaled # Keep original shape (n_samples, n_horizons)
|
||||
y_pred_inv_np = y_pred_scaled # Keep original shape
|
||||
|
||||
# --- Calculate Metrics using torchmetrics.functional (Overall across all horizons) ---
|
||||
metrics: Dict[str, float] = {'MAE': np.nan, 'RMSE': np.nan}
|
||||
try:
|
||||
if len(y_true_np) > 0: # Check if data exists after potential failures
|
||||
y_true_tensor = torch.from_numpy(y_true_np).float().cpu()
|
||||
y_pred_tensor = torch.from_numpy(y_pred_np).float().cpu()
|
||||
# Flatten arrays for overall metrics calculation
|
||||
y_true_flat_for_metrics = y_true_inv_np.flatten()
|
||||
y_pred_flat_for_metrics = y_pred_inv_np.flatten()
|
||||
|
||||
valid_mask = ~np.isnan(y_true_flat_for_metrics) & ~np.isnan(y_pred_flat_for_metrics)
|
||||
if np.sum(valid_mask) < len(y_true_flat_for_metrics):
|
||||
nan_count = len(y_true_flat_for_metrics) - np.sum(valid_mask)
|
||||
logger.warning(f"{nan_count} NaN values found in predictions/targets (across all horizons) for {eval_context_str}. These will be excluded from metrics.")
|
||||
|
||||
|
||||
if np.sum(valid_mask) > 0:
|
||||
y_true_tensor = torch.from_numpy(y_true_flat_for_metrics[valid_mask]).float().cpu()
|
||||
y_pred_tensor = torch.from_numpy(y_pred_flat_for_metrics[valid_mask]).float().cpu()
|
||||
|
||||
mae_tensor = torchmetrics.functional.mean_absolute_error(y_pred_tensor, y_true_tensor)
|
||||
mse_tensor = torchmetrics.functional.mean_squared_error(y_pred_tensor, y_true_tensor)
|
||||
@ -174,82 +183,95 @@ def evaluate_fold_predictions(
|
||||
metrics['MAE'] = mae_tensor.item()
|
||||
metrics['RMSE'] = rmse_tensor.item()
|
||||
|
||||
logger.info(f"Fold {fold_id} Test Set Metrics (torchmetrics): MAE={metrics['MAE']:.4f}, RMSE={metrics['RMSE']:.4f}")
|
||||
logger.info(f"{eval_context_str} Test Set Overall Metrics (torchmetrics): MAE={metrics['MAE']:.4f}, RMSE={metrics['RMSE']:.4f} (across all horizons)")
|
||||
else:
|
||||
logger.warning(f"Skipping metric calculation for Fold {fold_id} due to empty data after inverse transform.")
|
||||
logger.warning(f"Skipping metric calculation for {eval_context_str} due to no valid (non-NaN) data points.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to calculate metrics using torchmetrics for Fold {fold_id}: {e}", exc_info=True)
|
||||
# metrics already initialized to NaN
|
||||
logger.error(f"Failed to calculate overall metrics using torchmetrics for {eval_context_str}: {e}", exc_info=True)
|
||||
|
||||
|
||||
# --- Generate Plots (Optional - uses plotting utilities) ---
|
||||
if eval_config.save_plots and len(y_true_np) > 0:
|
||||
logger.info(f"Generating evaluation plots for Fold {fold_id}...")
|
||||
# Define plot directory and setup style
|
||||
fold_plot_dir = Path(output_dir) / f"fold_{fold_id:02d}" / "plots"
|
||||
setup_plot_style() # Apply consistent styling
|
||||
# --- Generate Plots (Optional - Focus on FIRST horizon) ---
|
||||
if eval_config.save_plots and np.sum(valid_mask) > 0:
|
||||
if forecast_horizons is None or not forecast_horizons:
|
||||
logger.warning(f"Skipping plot generation for {eval_context_str}: `forecast_horizons` list not provided.")
|
||||
elif prediction_time_index is None or len(prediction_time_index) != n_samples:
|
||||
logger.warning(f"Skipping plot generation for {eval_context_str}: `prediction_time_index` is missing or has incorrect length ({len(prediction_time_index) if prediction_time_index is not None else 'None'} != {n_samples}).")
|
||||
else:
|
||||
logger.info(f"Generating evaluation plots for {eval_context_str} (using first horizon H+{forecast_horizons[0]} only)...")
|
||||
base_plot_dir = Path(output_dir)
|
||||
fold_plot_dir = base_plot_dir / plot_subdir if plot_subdir else base_plot_dir
|
||||
setup_plot_style()
|
||||
|
||||
title_suffix = f"Fold {fold_id} Test Set"
|
||||
residuals_np = y_true_np - y_pred_np
|
||||
# --- Plotting for the FIRST horizon ---
|
||||
first_horizon = forecast_horizons[0]
|
||||
y_true_h1 = y_true_inv_np[:, 0] # Data for the first horizon
|
||||
y_pred_h1 = y_pred_inv_np[:, 0] # Data for the first horizon
|
||||
residuals_h1 = y_true_h1 - y_pred_h1
|
||||
|
||||
# Determine x-axis: use provided time_index if available, else integer indices
|
||||
# Note: Flattened y_true/y_pred have length n_samples * horizon
|
||||
# Need an appropriate index for this flattened view if time_index is provided.
|
||||
# Simple approach: use integer indices for flattened data.
|
||||
plot_indices = np.arange(len(y_true_np))
|
||||
xlabel = "Time Index (Flattened Horizon x Samples)"
|
||||
# If time_index corresponding to the start of each forecast is passed,
|
||||
# more sophisticated x-axis handling could be done, but integer indices are simpler.
|
||||
# Calculate the actual time index for the first horizon's targets
|
||||
# Requires the original dataset's frequency if available, otherwise assumes simple offset
|
||||
target_time_index_h1 = prediction_time_index
|
||||
try:
|
||||
# Assuming prediction_time_index corresponds to the *time* of prediction
|
||||
# The target for H+h occurs `h` steps later.
|
||||
# This requires a DatetimeIndex with a frequency.
|
||||
if isinstance(prediction_time_index, pd.DatetimeIndex) and prediction_time_index.freq:
|
||||
time_offset = pd.Timedelta(first_horizon, unit=prediction_time_index.freq.name)
|
||||
target_time_index_h1 = prediction_time_index + time_offset
|
||||
xlabel_h1 = f"Time (Target H+{first_horizon})"
|
||||
else:
|
||||
logger.warning(f"Prediction time index lacks frequency info. Using original prediction time for H+{first_horizon} plot x-axis.")
|
||||
xlabel_h1 = f"Prediction Time (Plotting H+{first_horizon})"
|
||||
except Exception as time_err:
|
||||
logger.warning(f"Could not calculate target time index for H+{first_horizon}: {time_err}. Using prediction time index for x-axis.")
|
||||
xlabel_h1 = f"Prediction Time (Plotting H+{first_horizon})"
|
||||
|
||||
|
||||
try:
|
||||
# Create and save each plot using utility functions
|
||||
fig_ts = create_time_series_plot(
|
||||
plot_indices, y_true_np, y_pred_np,
|
||||
f"Predictions vs Actual - {title_suffix}",
|
||||
xlabel=xlabel,
|
||||
ylabel="Value (Original Scale)",
|
||||
max_points=eval_config.plot_sample_size
|
||||
)
|
||||
save_plot(fig_ts, fold_plot_dir / "predictions_vs_actual.png")
|
||||
title_suffix = f"- {eval_context_str} (H+{first_horizon})"
|
||||
|
||||
fig_scatter = create_scatter_plot(
|
||||
y_true_np, y_pred_np,
|
||||
f"Scatter Plot - {title_suffix}",
|
||||
xlabel="Actual Values (Original Scale)",
|
||||
ylabel="Predicted Values (Original Scale)"
|
||||
)
|
||||
save_plot(fig_scatter, fold_plot_dir / "scatter_predictions.png")
|
||||
try:
|
||||
fig_ts = create_time_series_plot(
|
||||
target_time_index_h1, y_true_h1, y_pred_h1, # Use H1 data and time
|
||||
f"Predictions vs Actual {title_suffix}",
|
||||
xlabel=xlabel_h1, ylabel="Value (Original Scale)",
|
||||
max_points=eval_config.plot_sample_size
|
||||
)
|
||||
save_plot(fig_ts, fold_plot_dir / f"predictions_vs_actual_h{first_horizon}.png")
|
||||
|
||||
fig_res_time = create_residuals_plot(
|
||||
plot_indices, residuals_np,
|
||||
f"Residuals Over Time - {title_suffix}",
|
||||
xlabel=xlabel,
|
||||
ylabel="Residual (Original Scale)",
|
||||
max_points=eval_config.plot_sample_size
|
||||
)
|
||||
save_plot(fig_res_time, fold_plot_dir / "residuals_time.png")
|
||||
fig_scatter = create_scatter_plot(
|
||||
y_true_h1, y_pred_h1, # Use H1 data
|
||||
f"Scatter Plot {title_suffix}",
|
||||
xlabel="Actual Values (Original Scale)", ylabel="Predicted Values (Original Scale)"
|
||||
)
|
||||
save_plot(fig_scatter, fold_plot_dir / f"scatter_predictions_h{first_horizon}.png")
|
||||
|
||||
fig_res_dist = create_residuals_distribution_plot(
|
||||
residuals_np,
|
||||
f"Residuals Distribution - {title_suffix}",
|
||||
xlabel="Residual Value (Original Scale)",
|
||||
ylabel="Density"
|
||||
)
|
||||
save_plot(fig_res_dist, fold_plot_dir / "residuals_distribution.png")
|
||||
fig_res_time = create_residuals_plot(
|
||||
target_time_index_h1, residuals_h1, # Use H1 residuals and time
|
||||
f"Residuals Over Time {title_suffix}",
|
||||
xlabel=xlabel_h1, ylabel="Residual (Original Scale)",
|
||||
max_points=eval_config.plot_sample_size
|
||||
)
|
||||
save_plot(fig_res_time, fold_plot_dir / f"residuals_time_h{first_horizon}.png")
|
||||
|
||||
logger.info(f"Evaluation plots saved to: {fold_plot_dir}")
|
||||
# Residual distribution can use residuals from ALL horizons
|
||||
residuals_all = y_true_inv_np.flatten() - y_pred_inv_np.flatten()
|
||||
fig_res_dist = create_residuals_distribution_plot(
|
||||
residuals_all, # Use all residuals
|
||||
f"Residuals Distribution {eval_context_str} (All Horizons)", # Adjusted title
|
||||
xlabel="Residual Value (Original Scale)", ylabel="Density"
|
||||
)
|
||||
save_plot(fig_res_dist, fold_plot_dir / "residuals_distribution_all_horizons.png")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate or save one or more plots for Fold {fold_id}: {e}", exc_info=True)
|
||||
# Continue without plots, metrics are already calculated.
|
||||
logger.info(f"Evaluation plots saved to: {fold_plot_dir}")
|
||||
|
||||
elif eval_config.save_plots and len(y_true_np) == 0:
|
||||
logger.warning(f"Skipping plot generation for Fold {fold_id} due to empty data.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate or save one or more plots for {eval_context_str}: {e}", exc_info=True)
|
||||
|
||||
elif eval_config.save_plots and np.sum(valid_mask) == 0:
|
||||
logger.warning(f"Skipping plot generation for {eval_context_str} due to no valid data points.")
|
||||
|
||||
logger.info(f"Evaluation processing finished for Fold {fold_id}.")
|
||||
logger.info(f"Evaluation processing finished for {eval_context_str}.")
|
||||
return metrics
|
||||
|
||||
|
||||
@ -257,63 +279,90 @@ def evaluate_fold_predictions(
|
||||
# This function still calls evaluate_fold_predictions internally, so it benefits
|
||||
# from the updated plotting logic without needing direct changes here.
|
||||
def evaluate_model_on_fold_test_set(
|
||||
model: torch.nn.Module,
|
||||
model: LSTMForecastLightningModule, # Use the specific type
|
||||
test_loader: DataLoader,
|
||||
device: torch.device,
|
||||
target_scaler: Union[StandardScaler, MinMaxScaler, None],
|
||||
eval_config: EvaluationConfig,
|
||||
fold_num: int,
|
||||
output_dir: str
|
||||
output_dir: str,
|
||||
# time_index: Optional[Union[np.ndarray, pd.Index]] = None, # OLD
|
||||
prediction_time_index: Optional[pd.Index] = None, # Pass prediction time index
|
||||
forecast_horizons: Optional[List[int]] = None # Pass horizons
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
[Optional Function] Evaluates a given model on a fold's test set.
|
||||
|
||||
Runs the inference loop, collects scaled results, then processes them using
|
||||
`evaluate_fold_predictions` (which now uses plotting utilities).
|
||||
Useful for standalone testing or if not using pl.Trainer.test().
|
||||
Handles multiple forecast horizons.
|
||||
"""
|
||||
# ... (Implementation of inference loop remains the same) ...
|
||||
logger.info(f"Starting full evaluation (inference + processing) for Fold {fold_num + 1}...")
|
||||
model.eval()
|
||||
model.to(device)
|
||||
|
||||
all_preds_scaled_list: List[torch.Tensor] = []
|
||||
all_targets_scaled_list: List[torch.Tensor] = []
|
||||
|
||||
with torch.no_grad():
|
||||
for i, (X_batch, y_batch) in enumerate(test_loader):
|
||||
for i, batch in enumerate(test_loader):
|
||||
try:
|
||||
X_batch = X_batch.to(device)
|
||||
outputs = model(X_batch) # Scaled outputs
|
||||
if isinstance(batch, (list, tuple)) and len(batch) == 2:
|
||||
X_batch, y_batch = batch # y_batch shape: (batch, len(horizons))
|
||||
targets_present = True
|
||||
else:
|
||||
X_batch = batch
|
||||
y_batch = None
|
||||
targets_present = False
|
||||
|
||||
# Ensure outputs match target shape (e.g., handle trailing dimension)
|
||||
if outputs.shape != y_batch.shape:
|
||||
if outputs.ndim == y_batch.ndim + 1 and outputs.shape[-1] == 1:
|
||||
outputs = outputs.squeeze(-1)
|
||||
if outputs.shape != y_batch.shape:
|
||||
raise ValueError(f"Shape mismatch: Output {outputs.shape}, Target {y_batch.shape}")
|
||||
X_batch = X_batch.to(device)
|
||||
outputs = model(X_batch) # Scaled outputs: (batch, len(horizons))
|
||||
|
||||
all_preds_scaled_list.append(outputs.cpu())
|
||||
all_targets_scaled_list.append(y_batch.cpu()) # Keep targets on CPU
|
||||
|
||||
if targets_present and y_batch is not None:
|
||||
if outputs.shape != y_batch.shape:
|
||||
raise ValueError(f"Shape mismatch: Output {outputs.shape}, Target {y_batch.shape}")
|
||||
all_targets_scaled_list.append(y_batch.cpu())
|
||||
# ... error/warning if targets expected but not found ...
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during inference batch {i} for Fold {fold_num+1}: {e}", exc_info=True)
|
||||
raise ValueError(f"Inference failed on batch {i} for Fold {fold_num+1}")
|
||||
logger.error(f"Error during inference batch {i} for Fold {fold_num+1}: {e}", exc_info=True)
|
||||
raise ValueError(f"Inference failed on batch {i} for Fold {fold_num+1}")
|
||||
|
||||
|
||||
# Concatenate results from all batches
|
||||
# --- Concatenate results ---
|
||||
try:
|
||||
if not all_preds_scaled_list or not all_targets_scaled_list:
|
||||
logger.error(f"No prediction results collected for Fold {fold_num + 1}. Check test_loader.")
|
||||
if not all_preds_scaled_list:
|
||||
# ... handle no predictions ...
|
||||
return {'MAE': np.nan, 'RMSE': np.nan}
|
||||
# Resulting shapes: (n_samples, len(horizons))
|
||||
y_pred_scaled = torch.cat(all_preds_scaled_list, dim=0).numpy()
|
||||
|
||||
y_true_scaled = None
|
||||
if all_targets_scaled_list:
|
||||
y_true_scaled = torch.cat(all_targets_scaled_list, dim=0).numpy()
|
||||
elif targets_present:
|
||||
# ... handle missing targets ...
|
||||
return {'MAE': np.nan, 'RMSE': np.nan}
|
||||
else:
|
||||
# ... handle no targets available ...
|
||||
return {'MAE': np.nan, 'RMSE': np.nan}
|
||||
|
||||
y_pred_scaled = torch.cat(all_preds_scaled_list, dim=0).numpy()
|
||||
y_true_scaled = torch.cat(all_targets_scaled_list, dim=0).numpy()
|
||||
except Exception as e:
|
||||
logger.error(f"Error concatenating prediction results for Fold {fold_num + 1}: {e}", exc_info=True)
|
||||
# ... error handling ...
|
||||
raise ValueError("Failed to combine batch results during evaluation inference.")
|
||||
|
||||
# Process the collected predictions using the refactored function
|
||||
# No time_index passed here by default, plotting will use integer indices
|
||||
if y_true_scaled is None:
|
||||
# ... handle missing targets ...
|
||||
return {'MAE': np.nan, 'RMSE': np.nan}
|
||||
|
||||
# Ensure forecast_horizons are passed if available from the model
|
||||
# Retrieve from model's hparams if not passed explicitly
|
||||
if forecast_horizons is None:
|
||||
try:
|
||||
# Assuming forecast_horizon list is stored in model_config hparam
|
||||
forecast_horizons = model.hparams.model_config.forecast_horizon
|
||||
except AttributeError:
|
||||
logger.warning("Could not retrieve forecast_horizons from model hparams for evaluation.")
|
||||
|
||||
|
||||
# Process the collected predictions
|
||||
return evaluate_fold_predictions(
|
||||
y_true_scaled=y_true_scaled,
|
||||
y_pred_scaled=y_pred_scaled,
|
||||
@ -321,5 +370,8 @@ def evaluate_model_on_fold_test_set(
|
||||
eval_config=eval_config,
|
||||
fold_num=fold_num,
|
||||
output_dir=output_dir,
|
||||
time_index=None # Explicitly pass None
|
||||
# time_index=time_index # OLD
|
||||
prediction_time_index=prediction_time_index, # Pass through
|
||||
forecast_horizons=forecast_horizons, # Pass through
|
||||
plot_title_prefix=f"Test Fold {fold_num + 1}" # Example prefix
|
||||
)
|
@ -1,11 +1,15 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
from typing import Optional, Union
|
||||
from typing import Optional, Union, List
|
||||
import logging
|
||||
import pandas as pd
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# Assuming sklearn scalers are available
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def setup_plot_style(use_seaborn: bool = True) -> None:
|
||||
@ -17,14 +21,16 @@ def setup_plot_style(use_seaborn: bool = True) -> None:
|
||||
"""
|
||||
if use_seaborn:
|
||||
try:
|
||||
sns.set_theme(style="whitegrid", palette="muted")
|
||||
plt.rcParams['figure.figsize'] = (12, 6) # Default figure size
|
||||
# Use a different style that might be better for multiple lines
|
||||
sns.set_theme(style="whitegrid", palette="viridis") # Changed palette
|
||||
plt.rcParams['figure.figsize'] = (15, 7) # Slightly larger default figure size
|
||||
logger.debug("Seaborn plot style set.")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to set seaborn theme: {e}. Using default matplotlib style.")
|
||||
else:
|
||||
# Optional: Define a default matplotlib style if seaborn is not used
|
||||
plt.style.use('default')
|
||||
plt.rcParams['figure.figsize'] = (15, 7)
|
||||
logger.debug("Using default matplotlib plot style.")
|
||||
|
||||
def save_plot(fig: plt.Figure, filename: Union[str, Path]) -> None:
|
||||
@ -49,16 +55,21 @@ def save_plot(fig: plt.Figure, filename: Union[str, Path]) -> None:
|
||||
logger.info(f"Plot saved successfully to: {filepath}")
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to create directory for plot {filepath}: {e}", exc_info=True)
|
||||
raise # Re-raise OSError for directory creation issues
|
||||
# Don't re-raise immediately, try closing figure first
|
||||
# raise # Re-raise OSError for directory creation issues - Removed to ensure finally runs
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save plot to {filepath}: {e}", exc_info=True)
|
||||
raise # Re-raise other saving errors
|
||||
# Don't re-raise immediately, try closing figure first
|
||||
finally:
|
||||
# Close the figure to free up memory, regardless of saving success
|
||||
plt.close(fig)
|
||||
# Close the figure to free up memory, regardless of saving success or failure
|
||||
try:
|
||||
plt.close(fig)
|
||||
logger.debug(f"Closed figure for plot {filepath}.")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to close figure for plot {filepath}: {e}")
|
||||
|
||||
def create_time_series_plot(
|
||||
x: np.ndarray,
|
||||
x: Union[np.ndarray, pd.Index], # Allow pd.Index for time axis
|
||||
y_true: np.ndarray,
|
||||
y_pred: np.ndarray,
|
||||
title: str,
|
||||
@ -68,9 +79,9 @@ def create_time_series_plot(
|
||||
) -> plt.Figure:
|
||||
"""
|
||||
Create a time series plot comparing actual vs predicted values.
|
||||
|
||||
NOTE: When using multi-horizon forecasts, this typically plots only ONE selected horizon.
|
||||
Args:
|
||||
x: The array for the x-axis (e.g., time steps, indices).
|
||||
x: The array or index for the x-axis (e.g., time steps, datetime index). Should align with y_true/y_pred.
|
||||
y_true: Ground truth values (1D array).
|
||||
y_pred: Predicted values (1D array).
|
||||
title: Title for the plot.
|
||||
@ -84,8 +95,9 @@ def create_time_series_plot(
|
||||
Raises:
|
||||
ValueError: If input array shapes are incompatible.
|
||||
"""
|
||||
if not (x.shape == y_true.shape == y_pred.shape and x.ndim == 1):
|
||||
raise ValueError("Input arrays (x, y_true, y_pred) must be 1D and have the same shape.")
|
||||
# Add check for pd.Index for x
|
||||
if not isinstance(x, (np.ndarray, pd.Index)) or x.shape[0] != y_true.shape[0] or x.shape[0] != y_pred.shape[0] or y_true.ndim != 1 or y_pred.ndim != 1:
|
||||
raise ValueError(f"Input shapes mismatch or invalid types: x({type(x)}, {x.shape if hasattr(x, 'shape') else 'N/A'}), y_true({y_true.shape}), y_pred({y_pred.shape}). Expecting 1D y arrays and matching length x.")
|
||||
if len(x) == 0:
|
||||
logger.warning("Attempting to create time series plot with empty data.")
|
||||
# Return an empty figure or raise error? Let's return empty.
|
||||
@ -304,4 +316,243 @@ def create_residuals_distribution_plot(
|
||||
ax.grid(True, axis='y', linestyle='--', alpha=0.6)
|
||||
fig.tight_layout()
|
||||
|
||||
return fig
|
||||
return fig
|
||||
|
||||
def create_multi_horizon_time_series_plot(
|
||||
y_true_scaled_all_horizons: np.ndarray, # (N, H)
|
||||
y_pred_scaled_all_horizons: np.ndarray, # (N, H)
|
||||
target_scaler: Optional[Union[StandardScaler, MinMaxScaler]],
|
||||
prediction_time_index_h1: pd.DatetimeIndex, # Time index for the first horizon predictions
|
||||
forecast_horizons: List[int],
|
||||
title: str,
|
||||
xlabel: str = "Time",
|
||||
ylabel: str = "Value (Original Scale)",
|
||||
max_points: Optional[int] = 1000 # Limit points for clarity
|
||||
) -> plt.Figure:
|
||||
"""
|
||||
Create a time series plot comparing actual values to predictions for multiple horizons.
|
||||
Predictions for each horizon are plotted on their corresponding target time step.
|
||||
|
||||
Args:
|
||||
y_true_scaled_all_horizons: Ground truth values (N, H array) on scaled scale.
|
||||
y_pred_scaled_all_horizons: Predicted values (N, H array) on scaled scale.
|
||||
target_scaler: The scaler used for the target variable, needed for inverse transform.
|
||||
prediction_time_index_h1: DatetimeIndex for the first horizon (h=h1) predictions.
|
||||
Length should be N.
|
||||
forecast_horizons: List of forecast horizons (e.g., [1, 6, 12, 24]).
|
||||
title: Title for the plot.
|
||||
xlabel: Label for the x-axis.
|
||||
ylabel: Label for the y-axis.
|
||||
max_points: Maximum number of points to display (subsamples if needed).
|
||||
|
||||
Returns:
|
||||
The generated matplotlib Figure object.
|
||||
|
||||
Raises:
|
||||
ValueError: If input shapes are incompatible or horizons list is invalid.
|
||||
"""
|
||||
if y_true_scaled_all_horizons.shape != y_pred_scaled_all_horizons.shape:
|
||||
raise ValueError(f"Shapes of y_true_scaled_all_horizons {y_true_scaled_all_horizons.shape} and y_pred_scaled_all_horizons {y_pred_scaled_all_horizons.shape} must match.")
|
||||
if y_true_scaled_all_horizons.ndim != 2 or y_true_scaled_all_horizons.shape[1] != len(forecast_horizons):
|
||||
raise ValueError(f"y arrays must be 2D (N, H) where H is the number of horizons ({len(forecast_horizons)}). Shape is {y_true_scaled_all_horizons.shape}.")
|
||||
if len(prediction_time_index_h1) != y_true_scaled_all_horizons.shape[0]:
|
||||
raise ValueError(f"Length of prediction_time_index_h1 ({len(prediction_time_index_h1)}) must match the number of predictions ({y_true_scaled_all_horizons.shape[0]}).")
|
||||
if not isinstance(prediction_time_index_h1, pd.DatetimeIndex):
|
||||
logger.warning("prediction_time_index_h1 is not a DatetimeIndex. Time shifts may not work as expected.")
|
||||
if not forecast_horizons or len(forecast_horizons) == 0:
|
||||
raise ValueError("forecast_horizons list cannot be empty.")
|
||||
|
||||
logger.debug(f"Creating multi-horizon time series plot: {title}")
|
||||
setup_plot_style() # Apply standard style
|
||||
|
||||
fig, ax = plt.subplots(figsize=(18, 8)) # Larger figure for multi-horizon
|
||||
|
||||
n_points = y_true_scaled_all_horizons.shape[0]
|
||||
plot_indices = np.arange(n_points)
|
||||
|
||||
if max_points and n_points > max_points:
|
||||
step = max(1, n_points // max_points)
|
||||
plot_indices = plot_indices[::step]
|
||||
# Subsample the data and index
|
||||
y_true_scaled_plot = y_true_scaled_all_horizons[plot_indices]
|
||||
y_pred_scaled_plot = y_pred_scaled_all_horizons[plot_indices]
|
||||
time_index_h1_plot = prediction_time_index_h1[plot_indices]
|
||||
effective_title = f'{title} (Sampled {len(plot_indices)} points)'
|
||||
else:
|
||||
y_true_scaled_plot = y_true_scaled_all_horizons
|
||||
y_pred_scaled_plot = y_pred_scaled_all_horizons
|
||||
time_index_h1_plot = prediction_time_index_h1
|
||||
effective_title = title
|
||||
|
||||
# Inverse transform the subsampled data
|
||||
y_true_inv_plot = None
|
||||
y_pred_inv_plot = None
|
||||
if target_scaler is not None:
|
||||
try:
|
||||
# Scaler expects (N * H, 1), reshape (N, H) to (N*H, 1)
|
||||
y_true_inv_plot_flat = target_scaler.inverse_transform(y_true_scaled_plot.reshape(-1, 1))
|
||||
y_pred_inv_plot_flat = target_scaler.inverse_transform(y_pred_scaled_plot.reshape(-1, 1))
|
||||
# Reshape back to (N, H)
|
||||
y_true_inv_plot = y_true_inv_plot_flat.reshape(y_true_scaled_plot.shape)
|
||||
y_pred_inv_plot = y_pred_inv_plot_flat.reshape(y_pred_scaled_plot.shape)
|
||||
logger.debug("Successfully inverse-transformed data for multi-horizon plot.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to inverse transform data for multi-horizon plot: {e}", exc_info=True)
|
||||
# Fallback to plotting scaled data if inverse transform fails
|
||||
y_true_inv_plot = y_true_scaled_plot
|
||||
y_pred_inv_plot = y_pred_scaled_plot
|
||||
ylabel = f"{ylabel} (Scaled Data - Inverse Transform Failed)"
|
||||
|
||||
|
||||
if y_true_inv_plot is None or y_pred_inv_plot is None:
|
||||
# This should not happen with the fallback, but as a safeguard
|
||||
logger.error("Inverse transformed data is None, cannot plot.")
|
||||
return fig # Return empty figure
|
||||
|
||||
# Plot Actuals (using h1's time index, as it's the reference point)
|
||||
ax.plot(time_index_h1_plot, y_true_inv_plot[:, 0], label='Actuals', marker='.', linestyle='-', markersize=4, linewidth=1.5, color='black') # Actuals for H1
|
||||
|
||||
# Plot predictions for each horizon
|
||||
colors = sns.color_palette("viridis", len(forecast_horizons)) # Use palette for distinct colors
|
||||
linestyles = ['-', '--', '-.', ':'] * (len(forecast_horizons) // 4 + 1) # Cycle through linestyles
|
||||
|
||||
for i, horizon in enumerate(forecast_horizons):
|
||||
preds_h = y_pred_inv_plot[:, i]
|
||||
# Calculate time index for this specific horizon by shifting the h1 index
|
||||
# Assumes the time index frequency is appropriate for the horizon steps
|
||||
try:
|
||||
time_index_h = time_index_h1_plot + pd.to_timedelta(horizon - forecast_horizons[0], unit='h') # Assuming 'h' for hours
|
||||
ax.plot(time_index_h, preds_h, label=f'Predicted (h={horizon})', marker='x', linestyle=linestyles[i], markersize=4, alpha=0.8, linewidth=1, color=colors[i])
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not calculate time index for horizon {horizon}: {e}. Skipping plot for this horizon.", exc_info=True)
|
||||
|
||||
|
||||
# Configure plot appearance
|
||||
ax.set_title(effective_title, fontsize=16) # Slightly larger title
|
||||
ax.set_xlabel(xlabel, fontsize=12)
|
||||
ax.set_ylabel(ylabel, fontsize=12)
|
||||
ax.legend(fontsize=10) # Smaller legend font
|
||||
ax.grid(True, linestyle='--', alpha=0.6)
|
||||
|
||||
# Improve x-axis readability for datetimes
|
||||
fig.autofmt_xdate() # Auto-rotate date labels
|
||||
|
||||
fig.tight_layout()
|
||||
|
||||
return fig
|
||||
|
||||
def plot_loss_curve_from_csv(
|
||||
metrics_csv_path: Union[str, Path],
|
||||
output_path: Union[str, Path],
|
||||
title: str = "Training Loss Curve",
|
||||
train_loss_col: str = "train_loss", # Changed to match logging in model.py
|
||||
val_loss_col: str = "val_loss", # Common validation loss metric logged by PL
|
||||
epoch_col: str = "epoch"
|
||||
) -> None:
|
||||
"""
|
||||
Reads training metrics from a PyTorch Lightning CSVLogger file and plots
|
||||
training and validation loss curves over epochs.
|
||||
|
||||
Args:
|
||||
metrics_csv_path: Path to the metrics.csv file generated by CSVLogger.
|
||||
output_path: Path where the plot image will be saved.
|
||||
title: Title for the plot.
|
||||
train_loss_col: Name of the column containing epoch-level training loss.
|
||||
val_loss_col: Name of the column containing epoch-level validation loss.
|
||||
epoch_col: Name of the column containing the epoch number.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the metrics_csv_path does not exist.
|
||||
KeyError: If required columns are not found in the CSV.
|
||||
Exception: For other plotting or file reading errors.
|
||||
"""
|
||||
logger.info(f"Generating loss curve plot from: {metrics_csv_path}")
|
||||
metrics_path = Path(metrics_csv_path)
|
||||
if not metrics_path.is_file():
|
||||
raise FileNotFoundError(f"Metrics CSV file not found at: {metrics_path}")
|
||||
|
||||
try:
|
||||
metrics_df = pd.read_csv(metrics_path)
|
||||
|
||||
# Check if required columns exist
|
||||
required_cols = [epoch_col, train_loss_col]
|
||||
# Val loss column might be the scaled loss or the original scale MAE
|
||||
possible_val_cols = [val_loss_col, 'val_MeanAbsoluteError_Original_Scale', 'val_mae_orig_scale'] # Include potential names
|
||||
|
||||
found_val_col = None
|
||||
for col in possible_val_cols:
|
||||
if col in metrics_df.columns:
|
||||
found_val_col = col
|
||||
break
|
||||
|
||||
if not found_val_col:
|
||||
missing_cols = [col for col in required_cols if col not in metrics_df.columns]
|
||||
raise KeyError(f"Missing required columns in {metrics_path}: {missing_cols} or a suitable validation loss/metric column from {possible_val_cols}.")
|
||||
|
||||
|
||||
# --- Plotting ---
|
||||
setup_plot_style() # Apply standard style
|
||||
fig, ax1 = plt.subplots(figsize=(12, 6))
|
||||
|
||||
color1 = 'tab:red'
|
||||
ax1.set_xlabel(epoch_col.capitalize())
|
||||
# Adjust ylabel based on actual column name used for train loss
|
||||
ax1.set_ylabel(train_loss_col.replace('_epoch','').replace('_',' ').capitalize(), color=color1)
|
||||
# Drop NaNs specific to this column for plotting integrity
|
||||
train_plot_data = metrics_df[[epoch_col, train_loss_col]].dropna(subset=[train_loss_col])
|
||||
# Filter for epoch column only if needed (usually not for loss plots)
|
||||
# train_plot_data = train_plot_data[train_plot_data[epoch_col].notna()]
|
||||
|
||||
# Ensure epoch starts from 0 or 1 consistently
|
||||
if train_plot_data[epoch_col].min() > 0 and 0 in metrics_df[epoch_col].unique():
|
||||
# If epoch starts from 1 in plot data but 0 exists, adjust x-axis for alignment
|
||||
ax1.plot(train_plot_data[epoch_col] + 1, train_plot_data[train_loss_col], color=color1, label='Train Loss', marker='.', linestyle='-')
|
||||
logger.debug("Adjusting train loss x-axis by +1 for epoch alignment.")
|
||||
else:
|
||||
ax1.plot(train_plot_data[epoch_col], train_plot_data[train_loss_col], color=color1, label='Train Loss', marker='.', linestyle='-')
|
||||
|
||||
|
||||
ax1.tick_params(axis='y', labelcolor=color1)
|
||||
ax1.grid(True, axis='y', linestyle='--', alpha=0.6, which='major')
|
||||
|
||||
|
||||
# Validation loss/metric plotting on twin axis
|
||||
ax2 = ax1.twinx()
|
||||
color2 = 'tab:blue'
|
||||
# Adjust ylabel based on actual column name used for val metric
|
||||
ax2.set_ylabel(found_val_col.replace('_epoch','').replace('_',' ').capitalize(), color=color2)
|
||||
# Drop NaNs specific to the found validation column
|
||||
val_plot_data = metrics_df[[epoch_col, found_val_col]].dropna(subset=[found_val_col])
|
||||
# val_plot_data = val_plot_data[val_plot_data[epoch_col].notna()] # Ensure epoch is not NaN
|
||||
|
||||
# Ensure epoch starts from 0 or 1 consistently
|
||||
if val_plot_data[epoch_col].min() > 0 and 0 in metrics_df[epoch_col].unique():
|
||||
# If epoch starts from 1 in plot data but 0 exists, adjust x-axis for alignment
|
||||
ax2.plot(val_plot_data[epoch_col] + 1, val_plot_data[found_val_col], color=color2, label='Validation Metric', marker='x', linestyle='--')
|
||||
logger.debug("Adjusting val metric x-axis by +1 for epoch alignment.")
|
||||
else:
|
||||
ax2.plot(val_plot_data[epoch_col], val_plot_data[found_val_col], color=color2, label='Validation Metric', marker='x', linestyle='--')
|
||||
|
||||
|
||||
ax2.tick_params(axis='y', labelcolor=color2)
|
||||
|
||||
|
||||
# Add legend manually combining lines from both axes
|
||||
lines, labels = ax1.get_legend_handles_labels()
|
||||
lines2, labels2 = ax2.get_legend_handles_labels()
|
||||
ax2.legend(lines + lines2, labels + labels2, loc='upper right')
|
||||
|
||||
plt.title(title, fontsize=14)
|
||||
fig.tight_layout() # Otherwise the right y-label is slightly clipped
|
||||
|
||||
# Save the plot
|
||||
save_plot(fig, output_path)
|
||||
|
||||
except pd.errors.EmptyDataError:
|
||||
logger.error(f"Metrics CSV file is empty: {metrics_csv_path}")
|
||||
except KeyError as e:
|
||||
logger.error(f"Could not find expected column in {metrics_csv_path}: {e}")
|
||||
raise # Re-raise specific error after logging
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create or save loss curve plot from {metrics_csv_path}: {e}", exc_info=True)
|
||||
raise # Re-raise general errors
|
20
forecasting_model/train/__init__.py
Normal file
20
forecasting_model/train/__init__.py
Normal file
@ -0,0 +1,20 @@
|
||||
"""
|
||||
TODO
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
|
||||
# Expose core components for easier import
|
||||
from .ensemble_evaluation import (
|
||||
run_ensemble_evaluation
|
||||
)
|
||||
|
||||
|
||||
# Expose main configuration class from utils
|
||||
from ..utils import MainConfig
|
||||
|
||||
# Define __all__ for explicit public API (optional but good practice)
|
||||
__all__ = [
|
||||
"run_ensemble_evaluation",
|
||||
"MainConfig",
|
||||
]
|
276
forecasting_model/train/classic.py
Normal file
276
forecasting_model/train/classic.py
Normal file
@ -0,0 +1,276 @@
|
||||
"""
|
||||
Classic training routine: Train on initial data segment, validate and test on final segments.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import torch
|
||||
import yaml
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, LearningRateMonitor
|
||||
from pytorch_lightning.loggers import CSVLogger
|
||||
from typing import Dict, Optional
|
||||
|
||||
from forecasting_model.utils.forecast_config_model import MainConfig
|
||||
from forecasting_model.data_processing import prepare_fold_data_and_loaders, split_data_classic
|
||||
from forecasting_model.train.model import LSTMForecastLightningModule
|
||||
from forecasting_model.evaluation import evaluate_fold_predictions
|
||||
|
||||
from forecasting_model.utils.helper import save_results
|
||||
from forecasting_model.io.plotting import plot_loss_curve_from_csv
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def run_classic_training(
|
||||
config: MainConfig,
|
||||
full_df: pd.DataFrame,
|
||||
output_base_dir: Path
|
||||
) -> Optional[Dict[str, float]]:
|
||||
"""
|
||||
Runs a single training pipeline using a classic train/val/test split.
|
||||
|
||||
Args:
|
||||
config: The main configuration object.
|
||||
full_df: The complete raw DataFrame.
|
||||
output_base_dir: The base directory where general outputs are saved.
|
||||
Classic results will be saved in a subdirectory.
|
||||
|
||||
Returns:
|
||||
A dictionary containing test metrics (e.g., {'MAE': ..., 'RMSE': ...})
|
||||
for the classic run, or None if it fails.
|
||||
"""
|
||||
run_start_time = time.perf_counter()
|
||||
logger.info("--- Starting Classic Training Run ---")
|
||||
|
||||
# Define a specific output directory for this run
|
||||
classic_output_dir = output_base_dir / "classic_run"
|
||||
classic_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"Classic run outputs will be saved to: {classic_output_dir}")
|
||||
|
||||
test_metrics: Optional[Dict[str, float]] = None
|
||||
best_val_score: Optional[float] = None
|
||||
best_model_path: Optional[str] = None
|
||||
|
||||
try:
|
||||
# --- Data Splitting ---
|
||||
logger.info("Splitting data into classic train/val/test sets...")
|
||||
n_samples = len(full_df)
|
||||
val_frac = config.cross_validation.val_size_fraction
|
||||
test_frac = config.cross_validation.test_size_fraction
|
||||
train_idx, val_idx, test_idx = split_data_classic(n_samples, val_frac, test_frac)
|
||||
|
||||
# Store test datetime index for evaluation plotting
|
||||
test_datetime_index = full_df.iloc[test_idx].index
|
||||
|
||||
# --- Data Preparation ---
|
||||
logger.info("Preparing data loaders for the classic split...")
|
||||
train_loader, val_loader, test_loader, target_scaler, input_size = prepare_fold_data_and_loaders(
|
||||
full_df=full_df,
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
test_idx=test_idx,
|
||||
target_col=config.data.target_col,
|
||||
feature_config=config.features,
|
||||
train_config=config.training,
|
||||
eval_config=config.evaluation
|
||||
)
|
||||
logger.info(f"Data loaders prepared. Input size determined: {input_size}")
|
||||
|
||||
# Save artifacts specific to this run if needed (e.g., for later inference)
|
||||
torch.save(test_loader, classic_output_dir / "classic_test_loader.pt")
|
||||
torch.save(target_scaler, classic_output_dir / "classic_target_scaler.pt")
|
||||
torch.save(input_size, classic_output_dir / "classic_input_size.pt")
|
||||
# Save config for this run
|
||||
try: config_dump = config.model_dump()
|
||||
except AttributeError: config_dump = config.model_dump()
|
||||
with open(classic_output_dir / "config.yaml", 'w') as f:
|
||||
yaml.dump(config_dump, f, default_flow_style=False)
|
||||
|
||||
# --- Model Initialization ---
|
||||
model = LSTMForecastLightningModule(
|
||||
model_config=config.model,
|
||||
train_config=config.training,
|
||||
input_size=input_size,
|
||||
target_scaler=target_scaler
|
||||
)
|
||||
logger.info("Classic LSTMForecastLightningModule initialized.")
|
||||
|
||||
# --- PyTorch Lightning Callbacks ---
|
||||
monitor_metric = "val_MeanAbsoluteError" # Monitor same metric as CV folds
|
||||
monitor_mode = "min"
|
||||
|
||||
early_stop_callback = None
|
||||
if config.training.early_stopping_patience is not None and config.training.early_stopping_patience > 0:
|
||||
early_stop_callback = EarlyStopping(
|
||||
monitor=monitor_metric, min_delta=0.0001,
|
||||
patience=config.training.early_stopping_patience, verbose=True, mode=monitor_mode
|
||||
)
|
||||
logger.info(f"Enabled EarlyStopping: monitor='{monitor_metric}', patience={config.training.early_stopping_patience}")
|
||||
|
||||
checkpoint_callback = ModelCheckpoint(
|
||||
dirpath=classic_output_dir / "checkpoints",
|
||||
filename="best_classic_model", # Simple filename
|
||||
save_top_k=1, monitor=monitor_metric, mode=monitor_mode, verbose=True
|
||||
)
|
||||
logger.info(f"Enabled ModelCheckpoint: monitor='{monitor_metric}', mode='{monitor_mode}'")
|
||||
|
||||
lr_monitor = LearningRateMonitor(logging_interval='epoch')
|
||||
callbacks = [checkpoint_callback, lr_monitor]
|
||||
if early_stop_callback: callbacks.append(early_stop_callback)
|
||||
|
||||
# --- PyTorch Lightning Logger ---
|
||||
pl_logger = CSVLogger(save_dir=str(classic_output_dir), name="training_logs")
|
||||
logger.info(f"Using CSVLogger, logs will be saved in: {pl_logger.log_dir}")
|
||||
|
||||
# --- PyTorch Lightning Trainer ---
|
||||
accelerator = 'gpu' if torch.cuda.is_available() else 'cpu'
|
||||
devices = 1 if accelerator == 'gpu' else None
|
||||
precision = getattr(config.training, 'precision', 32)
|
||||
|
||||
trainer = pl.Trainer(
|
||||
accelerator=accelerator, devices=devices,
|
||||
max_epochs=config.training.epochs,
|
||||
callbacks=callbacks, logger=pl_logger,
|
||||
log_every_n_steps=max(1, len(train_loader)//10),
|
||||
enable_progress_bar=True,
|
||||
gradient_clip_val=getattr(config.training, 'gradient_clip_val', None),
|
||||
precision=precision,
|
||||
)
|
||||
logger.info(f"Initialized PyTorch Lightning Trainer: accelerator='{accelerator}', devices={devices}, precision={precision}")
|
||||
|
||||
# --- Training ---
|
||||
logger.info("Starting classic model training...")
|
||||
trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
|
||||
logger.info("Classic model training finished.")
|
||||
|
||||
# Store best validation score and path
|
||||
best_val_score_tensor = trainer.checkpoint_callback.best_model_score
|
||||
best_model_path = trainer.checkpoint_callback.best_model_path
|
||||
best_val_score = best_val_score_tensor.item() if best_val_score_tensor is not None else None
|
||||
|
||||
if best_val_score is not None:
|
||||
logger.info(f"Best validation score ({monitor_metric}): {best_val_score:.4f}")
|
||||
logger.info(f"Best model checkpoint path: {best_model_path}")
|
||||
else:
|
||||
logger.warning(f"Could not retrieve best validation score/path (metric: {monitor_metric}). Evaluation might use last model.")
|
||||
best_model_path = None
|
||||
|
||||
# --- Prediction on Test Set ---
|
||||
logger.info("Starting prediction on classic test set using best checkpoint...")
|
||||
prediction_results_list = trainer.predict(
|
||||
ckpt_path=best_model_path if best_model_path else 'last',
|
||||
dataloaders=test_loader
|
||||
)
|
||||
|
||||
# --- Evaluation ---
|
||||
if not prediction_results_list:
|
||||
logger.error("Predict phase did not return any results for classic run.")
|
||||
test_metrics = None
|
||||
else:
|
||||
try:
|
||||
# Shapes: (n_samples, len(horizons))
|
||||
all_preds_scaled = torch.cat([b['preds_scaled'] for b in prediction_results_list], dim=0).numpy()
|
||||
n_predictions = len(all_preds_scaled) # Number of samples actually predicted
|
||||
|
||||
if 'targets_scaled' in prediction_results_list[0]:
|
||||
all_targets_scaled = torch.cat([b['targets_scaled'] for b in prediction_results_list], dim=0).numpy()
|
||||
if len(all_targets_scaled) != n_predictions:
|
||||
logger.error(f"Classic Run: Mismatch between number of predictions ({n_predictions}) and targets ({len(all_targets_scaled)}).")
|
||||
raise ValueError("Prediction and target count mismatch during classic evaluation.")
|
||||
else:
|
||||
raise ValueError("Targets missing from prediction results.")
|
||||
|
||||
logger.info(f"Processing {n_predictions} prediction results for classic test set...")
|
||||
|
||||
# --- Calculate Correct Time Index for Plotting (First Horizon) ---
|
||||
target_time_index_for_plotting = None
|
||||
if test_idx is not None and config.features.forecast_horizon:
|
||||
try:
|
||||
test_block_index = full_df.index[test_idx] # Use the test_idx from classic split
|
||||
seq_len = config.features.sequence_length
|
||||
first_horizon = config.features.forecast_horizon[0]
|
||||
start_offset = seq_len + first_horizon - 1
|
||||
if start_offset < len(test_block_index):
|
||||
end_index = min(start_offset + n_predictions, len(test_block_index))
|
||||
target_time_index_for_plotting = test_block_index[start_offset:end_index]
|
||||
if len(target_time_index_for_plotting) != n_predictions:
|
||||
logger.warning(f"Classic Run: Calculated target time index length ({len(target_time_index_for_plotting)}) "
|
||||
f"does not match prediction count ({n_predictions}). Plotting x-axis might be misaligned.")
|
||||
target_time_index_for_plotting = None
|
||||
else:
|
||||
logger.warning(f"Classic Run: Cannot calculate target time index, start offset ({start_offset}) "
|
||||
f"exceeds test block length ({len(test_block_index)}).")
|
||||
except Exception as e:
|
||||
logger.error(f"Classic Run: Error calculating target time index for plotting: {e}", exc_info=True)
|
||||
target_time_index_for_plotting = None # Ensure it's None if error occurs
|
||||
else:
|
||||
logger.warning(f"Classic Run: Skipping target time index calculation (missing test_idx or forecast_horizon).")
|
||||
# --- End Index Calculation ---
|
||||
|
||||
# Use the classic run specific objects and config
|
||||
test_metrics = evaluate_fold_predictions(
|
||||
y_true_scaled=all_targets_scaled,
|
||||
y_pred_scaled=all_preds_scaled,
|
||||
target_scaler=target_scaler,
|
||||
eval_config=config.evaluation,
|
||||
fold_num=-1, # Indicate classic run
|
||||
output_dir=str(classic_output_dir),
|
||||
plot_subdir="plots",
|
||||
prediction_time_index=target_time_index_for_plotting, # Pass the correctly calculated index
|
||||
forecast_horizons=config.features.forecast_horizon,
|
||||
plot_title_prefix="Classic Run"
|
||||
)
|
||||
# Save metrics
|
||||
save_results({"overall_metrics": test_metrics}, classic_output_dir / "test_metrics.json")
|
||||
logger.info(f"Classic run test metrics (overall): {test_metrics}")
|
||||
|
||||
# --- Plot Loss Curve for Classic Run ---
|
||||
try:
|
||||
# Adjusted logic to find metrics.csv inside potential version_*/ directories
|
||||
classic_log_dir = classic_output_dir / "training_logs"
|
||||
metrics_file = None
|
||||
version_dirs = list(classic_log_dir.glob("version_*"))
|
||||
if version_dirs:
|
||||
# Assuming the latest version directory contains the relevant logs
|
||||
latest_version_dir = max(version_dirs, key=lambda p: p.stat().st_mtime)
|
||||
potential_metrics_file = latest_version_dir / "metrics.csv"
|
||||
if potential_metrics_file.is_file():
|
||||
metrics_file = potential_metrics_file
|
||||
else:
|
||||
logger.warning(f"Classic Run: metrics.csv not found in latest version directory: {latest_version_dir}")
|
||||
else:
|
||||
# Fallback if no version_* directories exist (less common with CSVLogger)
|
||||
potential_metrics_file = classic_log_dir / "metrics.csv"
|
||||
if potential_metrics_file.is_file():
|
||||
metrics_file = potential_metrics_file
|
||||
|
||||
if metrics_file and metrics_file.is_file():
|
||||
plot_loss_curve_from_csv(
|
||||
metrics_csv_path=metrics_file,
|
||||
output_path=classic_output_dir / "loss_curve.png",
|
||||
title="Classic Run Training Progression",
|
||||
train_loss_col='train_loss', # Changed from 'train_loss_epoch'
|
||||
val_loss_col='val_loss' # Keep as 'val_loss'
|
||||
)
|
||||
logger.info(f"Generating loss curve for classic run from: {metrics_file}")
|
||||
else:
|
||||
logger.warning(f"Classic Run: Could not find metrics.csv in {classic_log_dir} or its version subdirectories for loss curve plot.")
|
||||
except Exception as plot_e:
|
||||
logger.error(f"Classic Run: Failed to generate loss curve plot: {plot_e}", exc_info=True)
|
||||
# --- End Classic Loss Plotting ---
|
||||
|
||||
except (KeyError, ValueError, Exception) as e:
|
||||
logger.error(f"Error processing classic prediction results: {e}", exc_info=True)
|
||||
test_metrics = None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred during the classic training pipeline: {e}", exc_info=True)
|
||||
test_metrics = None # Indicate failure
|
||||
|
||||
finally:
|
||||
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||
run_end_time = time.perf_counter()
|
||||
logger.info(f"--- Finished Classic Training Run in {run_end_time - run_start_time:.2f} seconds ---")
|
||||
return test_metrics
|
425
forecasting_model/train/ensemble_evaluation.py
Normal file
425
forecasting_model/train/ensemble_evaluation.py
Normal file
@ -0,0 +1,425 @@
|
||||
"""
|
||||
Ensemble evaluation for time series forecasting models.
|
||||
|
||||
This module provides functionality to evaluate ensemble predictions
|
||||
by combining predictions from n-1 folds and testing on the remaining fold.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
import torch
|
||||
import yaml # For loading fold config
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
import pandas as pd # For time index handling
|
||||
import pickle # Need pickle for the specific error check
|
||||
|
||||
from forecasting_model.evaluation import evaluate_fold_predictions
|
||||
from forecasting_model.train.model import LSTMForecastLightningModule
|
||||
from forecasting_model.utils.forecast_config_model import MainConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def load_fold_model_and_objects(
|
||||
fold_dir: Path,
|
||||
) -> Optional[Tuple[LSTMForecastLightningModule, MainConfig, torch.utils.data.DataLoader, Union[StandardScaler, MinMaxScaler, None], int, Optional[pd.Index], List[int]]]:
|
||||
"""
|
||||
Load a trained model, its config, dataloader, scaler, input_size, prediction time index, and forecast horizons.
|
||||
|
||||
Args:
|
||||
fold_dir: Directory containing the fold's artifacts (checkpoint, config, loader, etc.).
|
||||
|
||||
Returns:
|
||||
A tuple containing (model, config, test_loader, target_scaler, input_size, prediction_target_time_index, forecast_horizons)
|
||||
or None if any essential artifact is missing or loading fails.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Loading artifacts from: {fold_dir}")
|
||||
|
||||
# 1. Load Fold Configuration
|
||||
config_path = fold_dir / "config.yaml"
|
||||
if not config_path.is_file():
|
||||
logger.error(f"Fold config file not found in {fold_dir}")
|
||||
return None
|
||||
with open(config_path, 'r') as f:
|
||||
fold_config_dict = yaml.safe_load(f)
|
||||
fold_config = MainConfig(**fold_config_dict) # Validate fold's config
|
||||
|
||||
# 2. Load Saved Objects using torch.load
|
||||
test_loader_path = fold_dir / "test_loader.pt"
|
||||
scaler_path = fold_dir / "target_scaler.pt"
|
||||
input_size_path = fold_dir / "input_size.pt"
|
||||
prediction_index_path = fold_dir / "prediction_target_time_index.pt"
|
||||
|
||||
if not all([p.is_file() for p in [test_loader_path, scaler_path, input_size_path]]):
|
||||
logger.error(f"Missing one or more required artifacts (test_loader, target_scaler, input_size) in {fold_dir}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# --- Explicitly set weights_only=False for non-model objects ---
|
||||
test_loader = torch.load(test_loader_path, weights_only=False)
|
||||
target_scaler = torch.load(scaler_path, weights_only=False)
|
||||
input_size = torch.load(input_size_path, weights_only=False)
|
||||
# --- End Modification ---
|
||||
except pickle.UnpicklingError as e:
|
||||
# Catch potential unpickling errors even with weights_only=False
|
||||
logger.error(f"Failed to unpickle saved object in {fold_dir}: {e}", exc_info=True)
|
||||
return None
|
||||
except AttributeError as e:
|
||||
# Catch potential issues if class definitions changed between saving and loading
|
||||
logger.error(f"AttributeError loading saved object in {fold_dir} (class definition changed?): {e}", exc_info=True)
|
||||
return None
|
||||
except Exception as e:
|
||||
# Catch other potential loading errors
|
||||
logger.error(f"Unexpected error loading saved objects (loader/scaler/size) from {fold_dir}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
# Retrieve forecast horizon list from the fold's config
|
||||
forecast_horizons = fold_config.features.forecast_horizon
|
||||
|
||||
# --- Extract prediction target time index (if available) ---
|
||||
prediction_target_time_index: Optional[pd.Index] = None
|
||||
if prediction_index_path.is_file():
|
||||
try:
|
||||
prediction_target_time_index = torch.load(prediction_index_path, weights_only=False)
|
||||
# Basic validation
|
||||
if not isinstance(prediction_target_time_index, pd.Index):
|
||||
logger.warning(f"Loaded prediction index from {prediction_index_path} is not a pandas Index.")
|
||||
prediction_target_time_index = None
|
||||
else:
|
||||
logger.debug(f"Loaded prediction target time index from {prediction_index_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load prediction target time index from {prediction_index_path}: {e}")
|
||||
else:
|
||||
logger.warning(f"Prediction target time index file not found at {prediction_index_path}. Plotting x-axis might be inaccurate for ensemble plots.")
|
||||
# --- End Index Extraction ---
|
||||
|
||||
|
||||
# 3. Find Checkpoint and Load Model
|
||||
checkpoint_path = None
|
||||
try:
|
||||
# Use rglob to find the checkpoint potentially nested deeper
|
||||
checkpoints = list(fold_dir.glob("**/best_model_fold_*.ckpt"))
|
||||
if not checkpoints:
|
||||
logger.error(f"No 'best_model_fold_*.ckpt' checkpoint found in {fold_dir} or subdirectories.")
|
||||
return None
|
||||
if len(checkpoints) > 1:
|
||||
logger.warning(f"Multiple checkpoints found in {fold_dir}, using the first one: {checkpoints[0]}")
|
||||
checkpoint_path = checkpoints[0]
|
||||
|
||||
logger.info(f"Loading model from checkpoint: {checkpoint_path}")
|
||||
model = LSTMForecastLightningModule.load_from_checkpoint(
|
||||
checkpoint_path,
|
||||
map_location=torch.device('cpu'), # Optional: load to CPU first if memory is tight
|
||||
model_config=fold_config.model,
|
||||
train_config=fold_config.training,
|
||||
input_size=input_size,
|
||||
target_scaler=target_scaler
|
||||
)
|
||||
model.eval()
|
||||
logger.info(f"Successfully loaded model and artifacts from {fold_dir}")
|
||||
return model, fold_config, test_loader, target_scaler, input_size, prediction_target_time_index, forecast_horizons
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Checkpoint file not found: {checkpoint_path}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load model from checkpoint {checkpoint_path} in {fold_dir}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Generic error loading artifacts from {fold_dir}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
def make_ensemble_predictions(
|
||||
models: List[LSTMForecastLightningModule],
|
||||
test_loader: torch.utils.data.DataLoader,
|
||||
device: Optional[torch.device] = None
|
||||
) -> Tuple[Optional[Dict[str, np.ndarray]], Optional[np.ndarray]]:
|
||||
"""
|
||||
Make predictions using an ensemble of models efficiently.
|
||||
|
||||
Processes the test_loader once, getting predictions from all models per batch.
|
||||
|
||||
Args:
|
||||
models: List of trained models (already in eval mode).
|
||||
test_loader: DataLoader for the test set.
|
||||
device: Device to run predictions on (e.g., torch.device("cuda:0")).
|
||||
If None, attempts to use GPU if available, else CPU.
|
||||
|
||||
Returns:
|
||||
Tuple of (ensemble_predictions, targets):
|
||||
- ensemble_predictions: Dict containing ensemble predictions keyed by method
|
||||
('mean', 'median', 'min', 'max'). Values are np.arrays.
|
||||
Returns None if prediction fails.
|
||||
- targets: Ground truth values as a single np.array. Returns None if prediction fails
|
||||
or targets are unavailable in loader.
|
||||
"""
|
||||
if not models:
|
||||
logger.warning("make_ensemble_predictions received an empty list of models.")
|
||||
return None, None
|
||||
|
||||
if device is None:
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
logger.info(f"Running ensemble predictions on device: {device}")
|
||||
|
||||
# Move all models to the target device
|
||||
for model in models:
|
||||
model.to(device)
|
||||
|
||||
all_batch_preds: List[List[np.ndarray]] = [[] for _ in models] # Outer list: models, Inner list: batches
|
||||
all_batch_targets: List[np.ndarray] = []
|
||||
targets_available = True
|
||||
|
||||
with torch.no_grad():
|
||||
for batch_idx, batch in enumerate(test_loader):
|
||||
try:
|
||||
# Determine if batch contains targets
|
||||
if isinstance(batch, (list, tuple)) and len(batch) == 2:
|
||||
x, y = batch
|
||||
x = x.to(device)
|
||||
# Keep targets on CPU until needed for concatenation
|
||||
all_batch_targets.append(y.cpu().numpy())
|
||||
else:
|
||||
x = batch.to(device)
|
||||
targets_available = False # No targets found in this batch
|
||||
|
||||
# Get predictions from all models for this batch
|
||||
for i, model in enumerate(models):
|
||||
try:
|
||||
pred = model(x) # Shape: (batch, horizon)
|
||||
all_batch_preds[i].append(pred.cpu().numpy())
|
||||
except Exception as model_err:
|
||||
logger.error(f"Error during prediction with model {i} on batch {batch_idx}: {model_err}", exc_info=True)
|
||||
# Handle error: Fill with NaNs? Skip model? For now, fill with NaNs of expected shape
|
||||
# Infer expected shape: (batch_size, horizon)
|
||||
batch_size = x.shape[0]
|
||||
horizon = models[0].output_size # Assume all models have same horizon
|
||||
nan_preds = np.full((batch_size, horizon), np.nan)
|
||||
all_batch_preds[i].append(nan_preds)
|
||||
|
||||
|
||||
except Exception as batch_err:
|
||||
logger.error(f"Error processing batch {batch_idx} for ensemble prediction: {batch_err}", exc_info=True)
|
||||
# If a batch fails catastrophically, we might not be able to proceed reliably
|
||||
return None, None # Indicate failure
|
||||
|
||||
# Concatenate batch results for each model
|
||||
model_preds_concat = []
|
||||
for i in range(len(models)):
|
||||
if not all_batch_preds[i]: # Check if any predictions were collected for this model
|
||||
logger.warning(f"No predictions collected for model index {i}. Skipping this model in ensemble.")
|
||||
continue # Skip this model if it failed on all batches
|
||||
try:
|
||||
model_preds_concat.append(np.concatenate(all_batch_preds[i], axis=0))
|
||||
except ValueError as e:
|
||||
logger.error(f"Failed to concatenate predictions for model index {i}: {e}. Check for shape mismatches or empty lists.")
|
||||
# Decide how to handle: skip model or fail? Let's skip for robustness.
|
||||
continue
|
||||
|
||||
if not model_preds_concat:
|
||||
logger.error("No valid predictions collected from any model in the ensemble.")
|
||||
return None, None
|
||||
|
||||
# Concatenate targets if available
|
||||
targets_concat = None
|
||||
if targets_available and all_batch_targets:
|
||||
try:
|
||||
targets_concat = np.concatenate(all_batch_targets, axis=0)
|
||||
except ValueError as e:
|
||||
logger.error(f"Failed to concatenate targets: {e}")
|
||||
return None, None # Fail if targets were expected but couldn't be combined
|
||||
elif targets_available and not all_batch_targets:
|
||||
logger.warning("Targets were expected based on first batch, but none were collected.")
|
||||
# Proceed without targets, returning None for them
|
||||
|
||||
# Stack predictions from all models: Shape (num_models, num_samples, horizon)
|
||||
try:
|
||||
stacked_preds = np.stack(model_preds_concat, axis=0)
|
||||
except ValueError as e:
|
||||
logger.error(f"Failed to stack model predictions: {e}. Check if all models produced compatible shapes.")
|
||||
return None, targets_concat # Return targets if available, but no ensemble preds
|
||||
|
||||
# Calculate different ensemble predictions (handle NaNs potentially introduced by model failures)
|
||||
# np.nanmean, np.nanmedian etc. ignore NaNs
|
||||
ensemble_preds = {
|
||||
'mean': np.nanmean(stacked_preds, axis=0),
|
||||
'median': np.nanmedian(stacked_preds, axis=0),
|
||||
'min': np.nanmin(stacked_preds, axis=0),
|
||||
'max': np.nanmax(stacked_preds, axis=0)
|
||||
}
|
||||
|
||||
logger.info(f"Ensemble predictions generated using {stacked_preds.shape[0]} models.")
|
||||
return ensemble_preds, targets_concat
|
||||
|
||||
|
||||
def evaluate_ensemble_for_test_fold(
|
||||
test_fold_num: int,
|
||||
all_fold_dirs: List[Path],
|
||||
output_base_dir: Path,
|
||||
# full_data_index: Optional[pd.Index] = None # Removed, get from loaded objects
|
||||
) -> Optional[Dict[str, Dict[str, float]]]:
|
||||
"""
|
||||
Evaluates ensemble predictions for a specific test fold.
|
||||
Args:
|
||||
test_fold_num: The 1-based number of the fold to use as the test set.
|
||||
all_fold_dirs: List of paths to all fold directories.
|
||||
output_base_dir: Base directory for saving evaluation results/plots.
|
||||
Returns:
|
||||
Dictionary containing metrics for each ensemble method for this test fold,
|
||||
or None if evaluation fails.
|
||||
"""
|
||||
logger.info(f"--- Evaluating Ensemble: Test Fold {test_fold_num} ---")
|
||||
test_fold_dir = output_base_dir / f"fold_{test_fold_num:02d}"
|
||||
|
||||
load_result = load_fold_model_and_objects(test_fold_dir)
|
||||
if load_result is None:
|
||||
logger.error(f"Failed to load necessary artifacts for test fold {test_fold_num}. Skipping ensemble evaluation for this fold.")
|
||||
return None
|
||||
# Unpack results including the prediction time index and horizons
|
||||
_, test_fold_config, test_loader, target_scaler, _, prediction_target_time_index, test_forecast_horizons = load_result
|
||||
|
||||
# Load models from all *other* folds
|
||||
ensemble_models: List[LSTMForecastLightningModule] = []
|
||||
model_forecast_horizons = None # Track horizons from loaded models
|
||||
for i, fold_dir in enumerate(all_fold_dirs):
|
||||
current_fold_num = i + 1
|
||||
if current_fold_num == test_fold_num:
|
||||
continue # Skip the test fold itself
|
||||
|
||||
model_load_result = load_fold_model_and_objects(fold_dir)
|
||||
if model_load_result:
|
||||
model, _, _, _, _, _, fold_horizons = model_load_result # Only need the model here
|
||||
if model:
|
||||
ensemble_models.append(model)
|
||||
# Store horizons from the first successful model load
|
||||
if model_forecast_horizons is None:
|
||||
model_forecast_horizons = fold_horizons
|
||||
# Optional: Check consistency of horizons across ensemble models
|
||||
elif set(model_forecast_horizons) != set(fold_horizons):
|
||||
logger.error(f"Inconsistent forecast horizons between ensemble models! Test fold {test_fold_num} expected {test_forecast_horizons}, "
|
||||
f"Model {i+1} has {fold_horizons}. Ensemble may be invalid.")
|
||||
# Decide how to handle: error out, or proceed with caution?
|
||||
# return None # Option: Fail hard
|
||||
else:
|
||||
logger.warning(f"Could not load model from fold {current_fold_num} to include in ensemble for test fold {test_fold_num}.")
|
||||
|
||||
|
||||
if len(ensemble_models) < 2:
|
||||
logger.warning(f"Skipping ensemble evaluation for test fold {test_fold_num}: "
|
||||
f"Need at least 2 models for ensemble, only loaded {len(ensemble_models)}.")
|
||||
return {} # Return empty dict, not None, to indicate process ran but no ensemble formed
|
||||
|
||||
# Check consistency between test fold horizons and ensemble model horizons
|
||||
if model_forecast_horizons is None: # Should not happen if len(ensemble_models) >= 1
|
||||
logger.error(f"Could not determine forecast horizons from ensemble models for test fold {test_fold_num}.")
|
||||
return None
|
||||
if set(test_forecast_horizons) != set(model_forecast_horizons):
|
||||
logger.error(f"Forecast horizons of test fold {test_fold_num} ({test_forecast_horizons}) do not match "
|
||||
f"horizons from ensemble models ({model_forecast_horizons}). Cannot evaluate.")
|
||||
return None
|
||||
|
||||
# Make ensemble predictions using the loaded models and the test fold's data loader
|
||||
# Use the test fold's config to determine device implicitly
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
ensemble_preds_dict, targets_np = make_ensemble_predictions(ensemble_models, test_loader, device=device)
|
||||
|
||||
if ensemble_preds_dict is None or targets_np is None:
|
||||
logger.error(f"Failed to generate ensemble predictions or retrieve targets for test fold {test_fold_num}.")
|
||||
return None # Indicate failure
|
||||
|
||||
# Evaluate each ensemble method's predictions against the test fold's targets
|
||||
fold_ensemble_results: Dict[str, Dict[str, float]] = {}
|
||||
for method, preds_np in ensemble_preds_dict.items():
|
||||
logger.info(f"Evaluating ensemble method '{method}' for test fold {test_fold_num}...")
|
||||
|
||||
# Define a unique output directory for this method's plots
|
||||
method_plot_dir = output_base_dir / "ensemble_eval_plots" / f"test_fold_{test_fold_num:02d}" / f"method_{method}"
|
||||
|
||||
# Use the prediction_target_time_index loaded earlier
|
||||
prediction_time_index_for_plot = None
|
||||
if prediction_target_time_index is not None:
|
||||
if len(prediction_target_time_index) == targets_np.shape[0]:
|
||||
prediction_time_index_for_plot = prediction_target_time_index
|
||||
else:
|
||||
logger.warning(f"Length of loaded prediction target time index ({len(prediction_target_time_index)}) does not match "
|
||||
f"number of samples ({targets_np.shape[0]}) for test fold {test_fold_num}, method '{method}'. Plot x-axis may be incorrect.")
|
||||
|
||||
|
||||
# Call the standard evaluation function
|
||||
metrics = evaluate_fold_predictions(
|
||||
y_true_scaled=targets_np,
|
||||
y_pred_scaled=preds_np,
|
||||
target_scaler=target_scaler,
|
||||
eval_config=test_fold_config.evaluation,
|
||||
fold_num=test_fold_num - 1,
|
||||
output_dir=str(method_plot_dir.parent.parent),
|
||||
plot_subdir=f"method_{method}",
|
||||
prediction_time_index=prediction_time_index_for_plot, # Pass the index
|
||||
forecast_horizons=test_forecast_horizons,
|
||||
plot_title_prefix=f"Ensemble ({method})"
|
||||
)
|
||||
fold_ensemble_results[method] = metrics
|
||||
|
||||
logger.info(f"--- Finished Ensemble Evaluation: Test Fold {test_fold_num} ---")
|
||||
return fold_ensemble_results
|
||||
|
||||
|
||||
def run_ensemble_evaluation(
|
||||
config: MainConfig, # Pass main config for context if needed, though fold configs are loaded
|
||||
output_base_dir: Path,
|
||||
# full_data_index: Optional[pd.Index] = None # Removed, get index from loaded objects
|
||||
) -> Dict[int, Dict[str, Dict[str, float]]]:
|
||||
"""
|
||||
Run ensemble evaluation across all folds, treating each as the test set once.
|
||||
|
||||
Args:
|
||||
config: The main configuration object (potentially unused if fold configs sufficient).
|
||||
output_base_dir: Base directory where fold outputs are stored.
|
||||
Returns:
|
||||
Dictionary containing ensemble metrics for each test fold:
|
||||
{ test_fold_num: { ensemble_method: { metric_name: value, ... }, ... }, ... }
|
||||
"""
|
||||
logger.info("===== Starting Cross-Validated Ensemble Evaluation =====")
|
||||
all_ensemble_results: Dict[int, Dict[str, Dict[str, float]]] = {}
|
||||
|
||||
# Discover fold directories
|
||||
fold_dirs = sorted([d for d in output_base_dir.glob("fold_*") if d.is_dir()])
|
||||
if not fold_dirs:
|
||||
logger.error(f"No fold directories found in {output_base_dir} for ensemble evaluation.")
|
||||
return {}
|
||||
if len(fold_dirs) < 2:
|
||||
logger.warning(f"Need at least 2 folds for ensemble evaluation, found {len(fold_dirs)}. Skipping.")
|
||||
return {}
|
||||
|
||||
logger.info(f"Found {len(fold_dirs)} fold directories.")
|
||||
|
||||
# Iterate through each fold, designating it as the test fold
|
||||
for i, test_fold_dir in enumerate(fold_dirs):
|
||||
test_fold_num = i + 1 # 1-based fold number
|
||||
try:
|
||||
results_for_test_fold = evaluate_ensemble_for_test_fold(
|
||||
test_fold_num=test_fold_num,
|
||||
all_fold_dirs=fold_dirs,
|
||||
output_base_dir=output_base_dir,
|
||||
# full_data_index=full_data_index # Removed
|
||||
)
|
||||
|
||||
if results_for_test_fold is not None:
|
||||
# Only add results if the evaluation didn't fail completely
|
||||
all_ensemble_results[test_fold_num] = results_for_test_fold
|
||||
|
||||
except Exception as e:
|
||||
# Catch unexpected errors during a specific test fold evaluation
|
||||
logger.error(f"Unexpected error during ensemble evaluation with test fold {test_fold_num}: {e}", exc_info=True)
|
||||
continue # Continue to the next fold
|
||||
|
||||
# Saving is handled by the main script (`forecasting_model_run.py`) which calls this
|
||||
if not all_ensemble_results:
|
||||
logger.warning("Ensemble evaluation finished, but no results were generated.")
|
||||
else:
|
||||
logger.info("===== Finished Cross-Validated Ensemble Evaluation =====")
|
||||
|
||||
return all_ensemble_results
|
0
forecasting_model/train/folds.py
Normal file
0
forecasting_model/train/folds.py
Normal file
@ -9,7 +9,7 @@ from typing import Optional, Dict, Any, Union, List, Tuple
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
|
||||
# Assuming config_model is in sibling directory utils/
|
||||
from forecasting_model.utils.config_model import ModelConfig, TrainingConfig
|
||||
from forecasting_model.utils.forecast_config_model import ModelConfig, TrainingConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -30,41 +30,42 @@ class LSTMForecastLightningModule(pl.LightningModule):
|
||||
super().__init__()
|
||||
|
||||
# --- Validate & Store Configs ---
|
||||
# Validate the input_size passed during instantiation
|
||||
if input_size <= 0:
|
||||
raise ValueError("`input_size` must be provided as a positive integer during model instantiation.")
|
||||
self._input_size = input_size # Use a temporary attribute
|
||||
|
||||
# Store the validated input_size directly for use in layer definitions
|
||||
self._input_size = input_size # Use a temporary attribute before hparams are saved
|
||||
# Ensure forecast_horizon is a valid list in the config
|
||||
if not hasattr(model_config, 'forecast_horizon') or \
|
||||
not isinstance(model_config.forecast_horizon, list) or \
|
||||
not model_config.forecast_horizon or \
|
||||
any(h <= 0 for h in model_config.forecast_horizon):
|
||||
raise ValueError("ModelConfig requires `forecast_horizon` to be a non-empty list of positive integers.")
|
||||
|
||||
# Ensure forecast_horizon is set in the config for the output layer
|
||||
if not hasattr(model_config, 'forecast_horizon') or model_config.forecast_horizon is None or model_config.forecast_horizon <= 0:
|
||||
raise ValueError("ModelConfig requires `forecast_horizon` to be set and positive.")
|
||||
self.output_size = model_config.forecast_horizon
|
||||
# Output size is the number of horizons we predict
|
||||
self.output_size = len(model_config.forecast_horizon)
|
||||
# Store the actual horizon list for reference if needed, ensure sorted
|
||||
self.forecast_horizons = sorted(model_config.forecast_horizon)
|
||||
|
||||
# Store configurations - input_size argument will be saved via save_hyperparameters
|
||||
self.model_config = model_config
|
||||
self.train_config = train_config
|
||||
self.target_scaler = target_scaler # Store scaler for this fold
|
||||
|
||||
# Use save_hyperparameters() to automatically log configs and allow loading
|
||||
# Pass input_size explicitly to be saved in hparams
|
||||
# Exclude scaler as it's stateful and fold-specific
|
||||
# Use save_hyperparameters() - forecast_horizon is part of model_config which is saved
|
||||
self.save_hyperparameters('model_config', 'train_config', 'input_size', ignore=['target_scaler'])
|
||||
# Note: Pydantic models might not be perfectly saved/loaded by PL's hparams, check if needed.
|
||||
# If issues arise loading, might need to flatten relevant hparams manually.
|
||||
|
||||
# --- Define Model Layers ---
|
||||
# Access input_size via hparams now
|
||||
self.lstm = nn.LSTM(
|
||||
input_size=self.hparams.input_size,
|
||||
hidden_size=self.hparams.model_config.hidden_size,
|
||||
num_layers=self.hparams.model_config.num_layers,
|
||||
batch_first=True, # Input shape: (batch, seq_len, features)
|
||||
batch_first=True,
|
||||
dropout=self.hparams.model_config.dropout if self.hparams.model_config.num_layers > 1 else 0.0
|
||||
)
|
||||
self.dropout = nn.Dropout(self.hparams.model_config.dropout)
|
||||
|
||||
# Output layer maps LSTM hidden state to the forecast horizon
|
||||
# We typically take the output of the last time step
|
||||
# Output layer maps LSTM hidden state to the number of forecast horizons
|
||||
self.fc = nn.Linear(self.hparams.model_config.hidden_size, self.output_size)
|
||||
|
||||
# Optional residual connection handling
|
||||
@ -96,7 +97,7 @@ class LSTMForecastLightningModule(pl.LightningModule):
|
||||
self.val_metrics = metrics.clone(prefix='val_')
|
||||
self.test_metrics = metrics.clone(prefix='test_')
|
||||
|
||||
self.val_mae_original_scale = torchmetrics.MeanAbsoluteError()
|
||||
self.val_MeanAbsoluteError_Original_Scale = torchmetrics.MeanAbsoluteError()
|
||||
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
@ -107,7 +108,8 @@ class LSTMForecastLightningModule(pl.LightningModule):
|
||||
x: Input tensor of shape (batch_size, sequence_length, input_size)
|
||||
|
||||
Returns:
|
||||
Predictions tensor of shape (batch_size, forecast_horizon)
|
||||
Predictions tensor of shape (batch_size, len(forecast_horizons))
|
||||
where each element corresponds to a predicted horizon in sorted order.
|
||||
"""
|
||||
# LSTM forward pass
|
||||
lstm_out, (hidden, cell) = self.lstm(x) # Shape: (batch, seq_len, hidden_size)
|
||||
@ -126,59 +128,50 @@ class LSTMForecastLightningModule(pl.LightningModule):
|
||||
last_time_step_out = last_time_step_out + residual
|
||||
|
||||
# Final fully connected layer
|
||||
predictions = self.fc(last_time_step_out) # Shape: (batch_size, output_size/horizon)
|
||||
predictions = self.fc(last_time_step_out) # Shape: (batch_size, output_size/len(horizons))
|
||||
|
||||
return predictions # Shape: (batch_size, forecast_horizon)
|
||||
return predictions # Shape: (batch_size, len(forecast_horizons))
|
||||
|
||||
def _calculate_loss(self, outputs, targets):
|
||||
# Ensure shapes match before loss calculation
|
||||
# Shapes should now be (batch_size, len(horizons)) for both
|
||||
if outputs.shape != targets.shape:
|
||||
# Squeeze potential extra dim: (batch, horizon, 1) -> (batch, horizon)
|
||||
if outputs.ndim == targets.ndim + 1 and outputs.shape[-1] == 1:
|
||||
outputs = outputs.squeeze(-1)
|
||||
if outputs.shape != targets.shape:
|
||||
raise ValueError(f"Output shape {outputs.shape} doesn't match target shape {targets.shape} for loss calculation.")
|
||||
# Minimal check, dataset __getitem__ should ensure this
|
||||
raise ValueError(f"Output shape {outputs.shape} doesn't match target shape {targets.shape} for loss calculation.")
|
||||
return self.criterion(outputs, targets)
|
||||
|
||||
def _inverse_transform(self, data: torch.Tensor) -> Optional[torch.Tensor]:
|
||||
"""Helper to inverse transform data using the stored target scaler."""
|
||||
"""Helper to inverse transform data (preds or targets) using the stored target scaler."""
|
||||
if self.target_scaler is None:
|
||||
# logger.warning("Cannot inverse transform: target_scaler not available.")
|
||||
return None # Cannot inverse transform
|
||||
return None
|
||||
|
||||
data_cpu = data.detach().cpu().numpy().astype(np.float64)
|
||||
original_shape = data_cpu.shape # e.g., (batch_size, len(horizons))
|
||||
num_elements = data_cpu.size
|
||||
|
||||
# Scaler expects 2D input (N, 1)
|
||||
# Ensure data is on CPU and is float64 for sklearn scaler typically
|
||||
data_cpu = data.detach().cpu().numpy().astype(np.float64)
|
||||
original_shape = data_cpu.shape
|
||||
if data_cpu.ndim == 1:
|
||||
data_flat = data_cpu.reshape(-1, 1)
|
||||
elif data_cpu.ndim == 2: # (batch, horizon)
|
||||
data_flat = data_cpu.reshape(-1, 1)
|
||||
else:
|
||||
logger.warning(f"Unexpected shape for inverse transform: {original_shape}. Reshaping to (-1, 1).")
|
||||
data_flat = data_cpu.reshape(-1, 1)
|
||||
data_flat = data_cpu.reshape(num_elements, 1)
|
||||
|
||||
try:
|
||||
inversed_np = self.target_scaler.inverse_transform(data_flat)
|
||||
# Return as tensor on the original device
|
||||
# Return as tensor on the original device, potentially reshaped
|
||||
inversed_tensor = torch.from_numpy(inversed_np).float().to(data.device)
|
||||
# Reshape back? Or keep flat? Keep flat for direct metric use often.
|
||||
return inversed_tensor.flatten()
|
||||
# return inversed_tensor.reshape(original_shape) # If original shape needed
|
||||
# Reshape back to original multi-horizon shape
|
||||
return inversed_tensor.reshape(original_shape)
|
||||
# return inversed_tensor.flatten() # Keep flat if needed for specific metric inputs
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to inverse transform data: {e}", exc_info=True)
|
||||
return None # Return None if inverse transform fails
|
||||
return None
|
||||
|
||||
|
||||
def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> torch.Tensor:
|
||||
x, y = batch # Shapes: x=(batch, seq_len, features), y=(batch, horizon)
|
||||
outputs = self(x) # Scaled outputs: (batch, horizon)
|
||||
x, y = batch # Shapes: x=(batch, seq_len, features), y=(batch, len(horizons))
|
||||
outputs = self(x) # Scaled outputs: (batch, len(horizons))
|
||||
loss = self._calculate_loss(outputs, y)
|
||||
|
||||
# Log scaled metrics
|
||||
metrics = self.train_metrics(outputs, y) # Update internal state
|
||||
self.train_metrics.update(outputs, y)
|
||||
self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
self.log_dict(self.train_metrics, on_step=False, on_epoch=True, logger=True) # Log all metrics in collection
|
||||
self.log_dict(self.train_metrics, on_step=False, on_epoch=True, logger=True)
|
||||
|
||||
return loss
|
||||
|
||||
@ -188,20 +181,22 @@ class LSTMForecastLightningModule(pl.LightningModule):
|
||||
loss = self._calculate_loss(outputs, y)
|
||||
|
||||
# Log scaled metrics
|
||||
metrics = self.val_metrics(outputs, y) # Update internal state
|
||||
self.val_metrics.update(outputs, y)
|
||||
self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
self.log_dict(self.val_metrics, on_step=False, on_epoch=True, logger=True)
|
||||
|
||||
# Log MAE on ORIGINAL scale if scaler is available (often the primary metric for checkpointing/Optuna)
|
||||
# Log MAE on ORIGINAL scale (primary metric for checkpoints)
|
||||
if self.target_scaler is not None:
|
||||
# Inverse transform keeps the (batch, len(horizons)) shape
|
||||
outputs_inv = self._inverse_transform(outputs)
|
||||
y_inv = self._inverse_transform(y)
|
||||
|
||||
if outputs_inv is not None and y_inv is not None:
|
||||
# Ensure shapes are compatible (flattened by _inverse_transform)
|
||||
# Ensure shapes match
|
||||
if outputs_inv.shape == y_inv.shape:
|
||||
self.val_mae_original_scale.update(outputs_inv, y_inv)
|
||||
self.log('val_mae_orig_scale', self.val_mae_original_scale, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
# It will compute the average MAE across all elements if multi-dim
|
||||
self.val_MeanAbsoluteError_Original_Scale.update(outputs_inv, y_inv)
|
||||
self.log('val_MeanAbsoluteError_Original_Scale', self.val_MeanAbsoluteError_Original_Scale, on_step=False, on_epoch=True, prog_bar=True, logger=True)
|
||||
else:
|
||||
logger.warning(f"Shape mismatch after inverse transform in validation: Preds {outputs_inv.shape}, Targets {y_inv.shape}")
|
||||
else:
|
@ -5,7 +5,7 @@ This package contains configuration models, helper functions, and other utilitie
|
||||
"""
|
||||
|
||||
# Expose configuration models
|
||||
from .config_model import (
|
||||
from .forecast_config_model import (
|
||||
MainConfig,
|
||||
DataConfig,
|
||||
FeatureConfig,
|
||||
|
@ -44,7 +44,7 @@ class DataConfig(BaseModel):
|
||||
class FeatureConfig(BaseModel):
|
||||
"""Configuration for feature engineering and preprocessing."""
|
||||
sequence_length: int = Field(..., gt=0)
|
||||
forecast_horizon: int = Field(..., gt=0)
|
||||
forecast_horizon: List[int] = Field(..., min_length=1, description="List of specific forecast horizons to predict (e.g., [1, 6, 12]).")
|
||||
lags: List[int] = []
|
||||
rolling_window_sizes: List[int] = []
|
||||
use_time_features: bool = True
|
||||
@ -55,11 +55,11 @@ class FeatureConfig(BaseModel):
|
||||
clipping: ClippingConfig = ClippingConfig() # Default instance
|
||||
scaling_method: Optional[Literal['standard', 'minmax']] = 'standard' # Added literal validation
|
||||
|
||||
@field_validator('lags', 'rolling_window_sizes')
|
||||
@field_validator('lags', 'rolling_window_sizes', 'forecast_horizon')
|
||||
@classmethod
|
||||
def check_positive_list_values(cls, v: List[int]) -> List[int]:
|
||||
if any(val <= 0 for val in v):
|
||||
raise ValueError('Lists lags/rolling_window_sizes must contain only positive values')
|
||||
raise ValueError('Lists lags, rolling_window_sizes, and forecast_horizon must contain only positive values')
|
||||
return v
|
||||
|
||||
class ModelConfig(BaseModel):
|
||||
@ -69,8 +69,8 @@ class ModelConfig(BaseModel):
|
||||
num_layers: int = Field(..., gt=0)
|
||||
dropout: float = Field(..., ge=0.0, le=1.0)
|
||||
use_residual_skips: bool = False
|
||||
# Add forecast_horizon here to ensure LightningModule gets it directly
|
||||
forecast_horizon: Optional[int] = Field(None, gt=0) # Will be set from FeatureConfig
|
||||
# forecast_horizon: Optional[int] = Field(None, gt=0) # OLD
|
||||
forecast_horizon: Optional[List[int]] = Field(None, min_length=1) # Will be set from FeatureConfig
|
||||
|
||||
class TrainingConfig(BaseModel):
|
||||
"""Configuration for the training process (PyTorch Lightning)."""
|
||||
@ -103,26 +103,35 @@ class EvaluationConfig(BaseModel):
|
||||
class OptunaConfig(BaseModel):
|
||||
"""Optional configuration for Optuna hyperparameter optimization."""
|
||||
enabled: bool = False
|
||||
study_name: str = "default_study" # Added study_name
|
||||
n_trials: int = Field(20, gt=0)
|
||||
storage: Optional[str] = None # e.g., "sqlite:///output/hpo_results/study.db"
|
||||
direction: Literal['minimize', 'maximize'] = 'minimize'
|
||||
metric_to_optimize: str = 'val_mae_orig_scale'
|
||||
pruning: bool = True
|
||||
metric_to_optimize: str = 'val_MeanAbsoluteError_Original_Scale' # Updated default metric
|
||||
pruning: bool = True
|
||||
|
||||
# --- Top-Level Configuration Model ---
|
||||
|
||||
class MainConfig(BaseModel):
|
||||
"""Main configuration model nesting all sections."""
|
||||
project_name: str = "TimeSeriesForecasting"
|
||||
random_seed: Optional[int] = 42 # Added top-level seed
|
||||
random_seed: Optional[int] = 42
|
||||
log_level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = 'INFO'
|
||||
output_dir: str = Field("output/cv_results", description="Base directory for saving all outputs (results, logs, models, plots).")
|
||||
|
||||
# --- Execution Control ---
|
||||
run_cross_validation: bool = Field(True, description="Run the main cross-validation training loop?")
|
||||
run_classic_training: bool = Field(True, description="Run a single classic train/val/test split training?")
|
||||
run_ensemble_evaluation: bool = Field(True, description="Run ensemble evaluation using CV fold models?")
|
||||
# --- End Execution Control ---
|
||||
|
||||
data: DataConfig
|
||||
features: FeatureConfig
|
||||
model: ModelConfig # ModelConfig no longer contains input_size
|
||||
model: ModelConfig
|
||||
training: TrainingConfig
|
||||
cross_validation: CrossValidationConfig
|
||||
evaluation: EvaluationConfig
|
||||
optuna: Optional[OptunaConfig] = OptunaConfig() # Added optional Optuna config
|
||||
optuna: Optional[OptunaConfig] = OptunaConfig()
|
||||
|
||||
@model_validator(mode='after')
|
||||
def check_forecast_horizon_consistency(self) -> 'MainConfig':
|
||||
@ -131,20 +140,33 @@ class MainConfig(BaseModel):
|
||||
if self.model.forecast_horizon is None:
|
||||
# If model config doesn't have it, set it from features config
|
||||
self.model.forecast_horizon = self.features.forecast_horizon
|
||||
elif self.model.forecast_horizon != self.features.forecast_horizon:
|
||||
elif set(self.model.forecast_horizon) != set(self.features.forecast_horizon): # Compare sets for content equality
|
||||
# If both are set but differ, raise error
|
||||
raise ValueError(
|
||||
f"ModelConfig forecast_horizon ({self.model.forecast_horizon}) must match "
|
||||
f"FeatureConfig forecast_horizon ({self.features.forecast_horizon})."
|
||||
)
|
||||
# After potential setting, ensure model.forecast_horizon is actually set
|
||||
if self.model and (self.model.forecast_horizon is None or self.model.forecast_horizon <= 0):
|
||||
raise ValueError("ModelConfig requires a positive forecast_horizon (must be set in features config if not set explicitly in model config).")
|
||||
# After potential setting, ensure model.forecast_horizon is actually set and valid
|
||||
if self.model and (
|
||||
self.model.forecast_horizon is None or
|
||||
not isinstance(self.model.forecast_horizon, list) or # Check type
|
||||
len(self.model.forecast_horizon) == 0 or # Check not empty
|
||||
any(h <= 0 for h in self.model.forecast_horizon) # Check positive values
|
||||
):
|
||||
raise ValueError("ModelConfig requires a non-empty list of positive forecast_horizon values (must be set in features config if not set explicitly in model config).")
|
||||
|
||||
# Input size check is removed as it's not part of static config anymore
|
||||
|
||||
return self
|
||||
|
||||
@model_validator(mode='after')
|
||||
def check_execution_flags(self) -> 'MainConfig':
|
||||
if not self.run_cross_validation and not self.run_classic_training:
|
||||
raise ValueError("At least one of 'run_cross_validation' or 'run_classic_training' must be True.")
|
||||
if self.run_ensemble_evaluation and not self.run_cross_validation:
|
||||
raise ValueError("'run_ensemble_evaluation' requires 'run_cross_validation' to be True (needs CV fold models).")
|
||||
return self
|
||||
|
||||
class Config:
|
||||
# Example configuration for Pydantic itself
|
||||
validate_assignment = True # Re-validate on assignment
|
173
forecasting_model/utils/helper.py
Normal file
173
forecasting_model/utils/helper.py
Normal file
@ -0,0 +1,173 @@
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Dict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import torch
|
||||
|
||||
import yaml
|
||||
|
||||
from forecasting_model import MainConfig
|
||||
|
||||
# Get the root logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""Parses command-line arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run the Time Series Forecasting training pipeline using a configuration file.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
'-c', '--config',
|
||||
type=str,
|
||||
default='config.yaml',
|
||||
help="Path to the YAML configuration file."
|
||||
)
|
||||
# Removed seed, debug, and output-dir arguments
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def load_config(config_path: Path) -> MainConfig:
|
||||
"""
|
||||
Load and validate configuration from YAML file using Pydantic.
|
||||
|
||||
Args:
|
||||
config_path: Path to the YAML configuration file.
|
||||
|
||||
Returns:
|
||||
Validated MainConfig object.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the config file doesn't exist.
|
||||
yaml.YAMLError: If the file is not valid YAML.
|
||||
pydantic.ValidationError: If the config doesn't match the schema.
|
||||
"""
|
||||
if not config_path.is_file():
|
||||
logger.error(f"Configuration file not found at: {config_path}")
|
||||
raise FileNotFoundError(f"Config file not found: {config_path}")
|
||||
|
||||
logger.info(f"Loading configuration from: {config_path}")
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
config_dict = yaml.safe_load(f)
|
||||
|
||||
# Validate configuration using Pydantic model
|
||||
config = MainConfig(**config_dict)
|
||||
logger.info("Configuration loaded and validated successfully.")
|
||||
return config
|
||||
except yaml.YAMLError as e:
|
||||
logger.error(f"Error parsing YAML file {config_path}: {e}", exc_info=True)
|
||||
raise
|
||||
except Exception as e: # Catches Pydantic validation errors too
|
||||
logger.error(f"Error validating configuration {config_path}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
def set_seeds(seed: Optional[int] = 42) -> None:
|
||||
"""
|
||||
Set random seeds for reproducibility across libraries.
|
||||
|
||||
Args:
|
||||
seed: The seed value to use. If None, uses default 42.
|
||||
"""
|
||||
actual_seed = seed if seed is not None else 42
|
||||
if seed is None:
|
||||
logger.warning(f"No random_seed specified in config, using default seed: {actual_seed}")
|
||||
else:
|
||||
logger.info(f"Setting random seed from config: {actual_seed}")
|
||||
|
||||
random.seed(actual_seed)
|
||||
np.random.seed(actual_seed)
|
||||
torch.manual_seed(actual_seed)
|
||||
# Ensure reproducibility for CUDA operations where possible
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.manual_seed(actual_seed)
|
||||
torch.cuda.manual_seed_all(actual_seed) # For multi-GPU
|
||||
# These settings can slow down training but improve reproducibility
|
||||
# torch.backends.cudnn.deterministic = True
|
||||
# torch.backends.cudnn.benchmark = False
|
||||
# PyTorch Lightning seeding (optional, as we seed torch directly)
|
||||
# pl.seed_everything(seed, workers=True) # workers=True ensures dataloader reproducibility
|
||||
|
||||
|
||||
def aggregate_cv_metrics(all_fold_metrics: List[Dict[str, float]]) -> Dict[str, Dict[str, float]]:
|
||||
"""
|
||||
Calculate mean and standard deviation of metrics across folds.
|
||||
Handles potential NaN values by ignoring them.
|
||||
|
||||
Args:
|
||||
all_fold_metrics: A list where each element is a dictionary of
|
||||
metrics for one fold (e.g., {'MAE': v1, 'RMSE': v2}).
|
||||
|
||||
Returns:
|
||||
A dictionary where keys are metric names and values are dicts
|
||||
containing 'mean' and 'std' for that metric across folds.
|
||||
Example: {'MAE': {'mean': m, 'std': s}, 'RMSE': {'mean': m2, 'std': s2}}
|
||||
"""
|
||||
if not all_fold_metrics:
|
||||
logger.warning("Received empty list for metric aggregation.")
|
||||
return {}
|
||||
|
||||
aggregated: Dict[str, Dict[str, float]] = {}
|
||||
# Get metric names from the first valid fold's results
|
||||
first_valid_metrics = next((m for m in all_fold_metrics if m), None)
|
||||
if not first_valid_metrics:
|
||||
logger.warning("No valid fold metrics found for aggregation.")
|
||||
return {}
|
||||
metric_names = list(first_valid_metrics.keys())
|
||||
|
||||
for metric in metric_names:
|
||||
# Collect values for this metric across all folds, ignoring NaNs
|
||||
values = [fold_metrics.get(metric) for fold_metrics in all_fold_metrics if fold_metrics and metric in fold_metrics]
|
||||
valid_values = [v for v in values if v is not None and not np.isnan(v)]
|
||||
|
||||
if not valid_values:
|
||||
logger.warning(f"No valid values found for metric '{metric}' across folds.")
|
||||
mean_val = np.nan
|
||||
std_val = np.nan
|
||||
else:
|
||||
mean_val = float(np.mean(valid_values))
|
||||
std_val = float(np.std(valid_values))
|
||||
logger.debug(f"Aggregated '{metric}': Mean={mean_val:.4f}, Std={std_val:.4f} from {len(valid_values)} folds.")
|
||||
|
||||
aggregated[metric] = {'mean': mean_val, 'std': std_val}
|
||||
|
||||
return aggregated
|
||||
|
||||
|
||||
def save_results(results: Dict, filename: Path):
|
||||
"""Save dictionary results to a JSON file."""
|
||||
try:
|
||||
filename.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Convert numpy types to native Python types for JSON serialization
|
||||
results_serializable = json.loads(json.dumps(results, cls=NumpyEncoder))
|
||||
with open(filename, 'w') as f:
|
||||
json.dump(results_serializable, f, indent=4)
|
||||
logger.info(f"Saved results to {filename}")
|
||||
except TypeError as e:
|
||||
logger.error(f"Serialization error saving results to {filename}. Check for non-serializable types (e.g., numpy types): {e}", exc_info=True)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save results to {filename}: {e}", exc_info=True)
|
||||
|
||||
|
||||
class NumpyEncoder(json.JSONEncoder):
|
||||
def default(self, obj):
|
||||
if isinstance(obj, np.integer):
|
||||
return int(obj)
|
||||
elif isinstance(obj, np.floating):
|
||||
return float(obj)
|
||||
elif isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
elif isinstance(obj, (np.bool_, bool)):
|
||||
return bool(obj)
|
||||
elif pd.isna(obj): # Handle pandas NaT or numpy NaN gracefully
|
||||
return None
|
||||
return super(NumpyEncoder, self).default(obj)
|
Reference in New Issue
Block a user