intermediate backup

This commit is contained in:
2025-05-12 20:05:28 +02:00
parent 26ee0cd334
commit 399eae6481
12 changed files with 22 additions and 29 deletions

View File

View File

@ -93,7 +93,7 @@ def perform_stationarity_tests(series: pd.Series) -> Tuple[Optional[Dict[str, An
'#Lags Used', '#Lags Used',
'#Observations Used', '#Observations Used',
'Critical Values', 'Critical Values',
'IC Best' # Added by newer statsmodels 'IC Best'
] ]
# Only map existing keys from result tuple # Only map existing keys from result tuple
results['adf'] = {key: val for key, val in zip(adf_keys, adf_test) if key != 'IC Best'} results['adf'] = {key: val for key, val in zip(adf_keys, adf_test) if key != 'IC Best'}

BIN
entrix_case_callenge.zip Normal file

Binary file not shown.

View File

@ -44,7 +44,7 @@ def load_fold_model_and_objects(
# 2. Load Saved Objects using torch.load # 2. Load Saved Objects using torch.load
test_loader_path = fold_dir / "test_loader.pt" test_loader_path = fold_dir / "test_loader.pt"
target_scaler_path = fold_dir / "target_scaler.pt" target_scaler_path = fold_dir / "target_scaler.pt"
data_scaler_path = fold_dir / "data_scaler.pt" # Added path for data_scaler data_scaler_path = fold_dir / "data_scaler.pt"
input_size_path = fold_dir / "input_size.pt" input_size_path = fold_dir / "input_size.pt"
prediction_index_path = fold_dir / "prediction_target_time_index.pt" prediction_index_path = fold_dir / "prediction_target_time_index.pt"

View File

@ -95,7 +95,7 @@ class LSTMForecastLightningModule(pl.LightningModule):
# --- Define Metrics (TorchMetrics) --- # --- Define Metrics (TorchMetrics) ---
metrics = torchmetrics.MetricCollection([ metrics = torchmetrics.MetricCollection([
torchmetrics.MeanAbsoluteError(), torchmetrics.MeanAbsoluteError(),
torchmetrics.MeanSquaredError(squared=False) # RMSE torchmetrics.MeanSquaredError(squared=False) # RMSE
]) ])
self.train_metrics = metrics.clone(prefix='train_') self.train_metrics = metrics.clone(prefix='train_')
self.val_metrics = metrics.clone(prefix='val_') self.val_metrics = metrics.clone(prefix='val_')

View File

@ -5,7 +5,7 @@ import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.preprocessing import StandardScaler, MinMaxScaler
from typing import Tuple, Optional, Union, Type from typing import Tuple, Optional, Union, Type
import math # Add math import import math
from forecasting_model.utils.dataset_splitter import TimeSeriesDataset from forecasting_model.utils.dataset_splitter import TimeSeriesDataset
from forecasting_model.utils.forecast_config_model import FeatureConfig, TrainingConfig, EvaluationConfig from forecasting_model.utils.forecast_config_model import FeatureConfig, TrainingConfig, EvaluationConfig
@ -15,8 +15,6 @@ from forecasting_model.utils.forecast_config_model import FeatureConfig, Trainin
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# --- Data Loading ---
# --- Feature Engineering --- # --- Feature Engineering ---
def engineer_features(df: pd.DataFrame, target_col: str, feature_config: FeatureConfig) -> pd.DataFrame: def engineer_features(df: pd.DataFrame, target_col: str, feature_config: FeatureConfig) -> pd.DataFrame:
""" """
@ -95,7 +93,7 @@ def engineer_features(df: pd.DataFrame, target_col: str, feature_config: Feature
features_df['cos_day'] = np.cos(2 * np.pi * seconds_past_midnight / seconds_in_day) features_df['cos_day'] = np.cos(2 * np.pi * seconds_past_midnight / seconds_in_day)
# 5. Wavelet Transform (Optional) # 5. Wavelet Transform
if feature_config.wavelet_transform and feature_config.wavelet_transform.apply: if feature_config.wavelet_transform and feature_config.wavelet_transform.apply:
logger.warning("Wavelet feature engineering is specified but not implemented yet.") logger.warning("Wavelet feature engineering is specified but not implemented yet.")
@ -237,9 +235,6 @@ def prepare_fold_data_and_loaders(
# Also need history for the input sequence length and max target horizon # Also need history for the input sequence length and max target horizon
max_horizon_needed = max(feature_config.forecast_horizon) if feature_config.forecast_horizon else 0 max_horizon_needed = max(feature_config.forecast_horizon) if feature_config.forecast_horizon else 0
# Max history needed is max of lookback for features OR (sequence_length + max_horizon - 1) for targets/inputs # Max history needed is max of lookback for features OR (sequence_length + max_horizon - 1) for targets/inputs
# Correct logic: Need `sequence_length` history for input, and `max_horizon` steps *after* the train data for targets/evaluation.
# The slicing needs to ensure enough data *before* train_idx[0] for feature lookback *and* sequence_length.
# Max history *before* the start of the training set
max_history_needed_before_train = max(max_lookback, feature_config.sequence_length) max_history_needed_before_train = max(max_lookback, feature_config.sequence_length)
slice_start_idx = max(0, int(train_idx[0] - max_history_needed_before_train)) slice_start_idx = max(0, int(train_idx[0] - max_history_needed_before_train))
@ -375,8 +370,6 @@ def prepare_fold_data_and_loaders(
except ValueError as e: except ValueError as e:
# This might happen if data cannot be safely cast (e.g., strings remain unexpectedly) # This might happen if data cannot be safely cast (e.g., strings remain unexpectedly)
logger.error(f"Failed to convert data arrays to float32 before creating Tensors: {e}", exc_info=True) logger.error(f"Failed to convert data arrays to float32 before creating Tensors: {e}", exc_info=True)
# Consider adding more debug info here if it fails, e.g.:
# logger.debug(f"Data types in train_df before conversion: \n{train_df.dtypes}")
raise ValueError("Data could not be converted to numeric type (float32) for PyTorch.") from e raise ValueError("Data could not be converted to numeric type (float32) for PyTorch.") from e

View File

@ -48,7 +48,7 @@ def evaluate_fold_predictions(
y_true_scaled: Numpy array of scaled ground truth targets (n_samples, len(horizons)). y_true_scaled: Numpy array of scaled ground truth targets (n_samples, len(horizons)).
y_pred_scaled: Numpy array of scaled model predictions (n_samples, len(horizons)). y_pred_scaled: Numpy array of scaled model predictions (n_samples, len(horizons)).
target_scaler: The scaler fitted on the target variable. Used for inverse transform. target_scaler: The scaler fitted on the target variable. Used for inverse transform.
data_scaler: The scaler fitted on the input features (kept for potential future use or context, not used in current calculations). # ADDED Docstring data_scaler: The scaler fitted on the input features (kept for potential future use or context, not used in current calculations).
eval_config: Configuration object for evaluation parameters. eval_config: Configuration object for evaluation parameters.
fold_num: The current fold number (zero-based or -1 for classic). fold_num: The current fold number (zero-based or -1 for classic).
output_dir: The base directory to save outputs. output_dir: The base directory to save outputs.

View File

@ -47,12 +47,12 @@ class FeatureConfig(BaseModel):
lags: List[int] = [] lags: List[int] = []
rolling_window_sizes: List[int] = [] rolling_window_sizes: List[int] = []
use_time_features: bool = True use_time_features: bool = True
sinus_curve: bool = False # Added sinus_curve: bool = False
cosine_curve: bool = False # Added cosine_curve: bool = False
wavelet_transform: Optional[WaveletTransformConfig] = None wavelet_transform: Optional[WaveletTransformConfig] = None
fill_nan: Optional[Union[str, float, int]] = 'ffill' # Added (e.g., 'ffill', 0) fill_nan: Optional[Union[str, float, int]] = 'ffill'
clipping: ClippingConfig = ClippingConfig() # Default instance clipping: ClippingConfig = ClippingConfig() # Default instance
scaling_method: Optional[Literal['standard', 'minmax']] = 'standard' # Added literal validation scaling_method: Optional[Literal['standard', 'minmax']] = 'standard'
@field_validator('lags', 'rolling_window_sizes', 'forecast_horizon') @field_validator('lags', 'rolling_window_sizes', 'forecast_horizon')
def check_positive_list_values(cls, v: List[int]) -> List[int]: def check_positive_list_values(cls, v: List[int]) -> List[int]:
@ -81,9 +81,9 @@ class TrainingConfig(BaseModel):
early_stopping_patience: Optional[int] = Field(None, ge=1) # Patience must be >= 1 if set early_stopping_patience: Optional[int] = Field(None, ge=1) # Patience must be >= 1 if set
scheduler_step_size: Optional[int] = Field(None, gt=0) scheduler_step_size: Optional[int] = Field(None, gt=0)
scheduler_gamma: Optional[float] = Field(None, gt=0.0, lt=1.0) scheduler_gamma: Optional[float] = Field(None, gt=0.0, lt=1.0)
gradient_clip_val: Optional[float] = Field(None, ge=0.0) # Added gradient_clip_val: Optional[float] = Field(None, ge=0.0)
num_workers: int = Field(0, ge=0) # Added num_workers: int = Field(0, ge=0)
precision: Literal[16, 32, 64, 'bf16'] = 32 # Added precision: Literal[16, 32, 64, 'bf16'] = 32
class CrossValidationConfig(BaseModel): class CrossValidationConfig(BaseModel):
"""Configuration for time series cross-validation.""" """Configuration for time series cross-validation."""
@ -102,7 +102,7 @@ class EvaluationConfig(BaseModel):
class OptunaConfig(BaseModel): class OptunaConfig(BaseModel):
"""Optional configuration for Optuna hyperparameter optimization.""" """Optional configuration for Optuna hyperparameter optimization."""
enabled: bool = False enabled: bool = False
study_name: str = "default_study" # Added study_name study_name: str = "default_study"
n_trials: int = Field(20, gt=0) n_trials: int = Field(20, gt=0)
storage: Optional[str] = None # e.g., "sqlite:///output/hpo_results/study.db" storage: Optional[str] = None # e.g., "sqlite:///output/hpo_results/study.db"
direction: Literal['minimize', 'maximize'] = 'minimize' direction: Literal['minimize', 'maximize'] = 'minimize'

View File

@ -98,15 +98,15 @@ def run_single_fold(
# Variables to hold prediction results for plotting later # Variables to hold prediction results for plotting later
all_preds_scaled: Optional[np.ndarray] = None all_preds_scaled: Optional[np.ndarray] = None
all_targets_scaled: Optional[np.ndarray] = None all_targets_scaled: Optional[np.ndarray] = None
target_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None # Need to keep scaler reference target_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None
data_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None # Added to keep data scaler reference data_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None
prediction_target_time_index_h1: Optional[pd.DatetimeIndex] = None prediction_target_time_index_h1: Optional[pd.DatetimeIndex] = None
pl_logger = None pl_logger = None
# Variables to store paths of saved artifacts # Variables to store paths of saved artifacts
saved_model_path: Optional[Path] = None saved_model_path: Optional[Path] = None
saved_target_scaler_path: Optional[Path] = None saved_target_scaler_path: Optional[Path] = None
saved_data_scaler_path: Optional[Path] = None # Added saved_data_scaler_path: Optional[Path] = None
saved_input_size_path: Optional[Path] = None saved_input_size_path: Optional[Path] = None
saved_config_path: Optional[Path] = None saved_config_path: Optional[Path] = None

View File

@ -167,7 +167,7 @@ if __name__ == "__main__":
model_instance=loaded_artifact_info['model_instance'], model_instance=loaded_artifact_info['model_instance'],
feature_config=provider_feature_config, # Pass config for seq_len, horizons feature_config=provider_feature_config, # Pass config for seq_len, horizons
target_scaler=provider_target_scaler, # Pass the loaded target scaler target_scaler=provider_target_scaler, # Pass the loaded target scaler
data_scaler=provider_data_scaler # --- >>> NEW: Pass the loaded data scaler <<< --- data_scaler=provider_data_scaler
) )
# Basic validation # Basic validation
if 1 not in provider_instance.get_forecast_horizons(): if 1 not in provider_instance.get_forecast_horizons():

View File

@ -1,15 +1,15 @@
import argparse import argparse
import logging import logging
import sys import sys
import warnings # Import the warnings module import warnings
import copy # For deep copying config import copy
from pathlib import Path from pathlib import Path
import time import time
import pandas as pd import pandas as pd
import torch import torch
import yaml # Added for saving best config import yaml
import optuna import optuna
import pytorch_lightning as pl import pytorch_lightning as pl

View File

@ -362,7 +362,7 @@ def run_hpo():
logger.setLevel(effective_log_level) logger.setLevel(effective_log_level)
logger.info(f"Set log level to {level_name}. Logging HPO run to console and {log_file}") logger.info(f"Set log level to {level_name}. Logging HPO run to console and {log_file}")
if effective_log_level <= logging.DEBUG: logger.debug("Debug logging enabled.") if effective_log_level <= logging.DEBUG: logger.debug("Debug logging enabled.")
except (AttributeError, ValueError, TypeError) as e: # Added TypeError except (AttributeError, ValueError, TypeError) as e:
logger.warning(f"Could not set log level from config: {e}. Defaulting to INFO.") logger.warning(f"Could not set log level from config: {e}. Defaulting to INFO.")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
# Still try to log to a default file if possible # Still try to log to a default file if possible