intermediate backup

This commit is contained in:
2025-05-12 20:05:28 +02:00
parent 26ee0cd334
commit 399eae6481
12 changed files with 22 additions and 29 deletions

View File

View File

@ -93,7 +93,7 @@ def perform_stationarity_tests(series: pd.Series) -> Tuple[Optional[Dict[str, An
'#Lags Used',
'#Observations Used',
'Critical Values',
'IC Best' # Added by newer statsmodels
'IC Best'
]
# Only map existing keys from result tuple
results['adf'] = {key: val for key, val in zip(adf_keys, adf_test) if key != 'IC Best'}

BIN
entrix_case_callenge.zip Normal file

Binary file not shown.

View File

@ -44,7 +44,7 @@ def load_fold_model_and_objects(
# 2. Load Saved Objects using torch.load
test_loader_path = fold_dir / "test_loader.pt"
target_scaler_path = fold_dir / "target_scaler.pt"
data_scaler_path = fold_dir / "data_scaler.pt" # Added path for data_scaler
data_scaler_path = fold_dir / "data_scaler.pt"
input_size_path = fold_dir / "input_size.pt"
prediction_index_path = fold_dir / "prediction_target_time_index.pt"

View File

@ -95,7 +95,7 @@ class LSTMForecastLightningModule(pl.LightningModule):
# --- Define Metrics (TorchMetrics) ---
metrics = torchmetrics.MetricCollection([
torchmetrics.MeanAbsoluteError(),
torchmetrics.MeanSquaredError(squared=False) # RMSE
torchmetrics.MeanSquaredError(squared=False) # RMSE
])
self.train_metrics = metrics.clone(prefix='train_')
self.val_metrics = metrics.clone(prefix='val_')

View File

@ -5,7 +5,7 @@ import torch
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from typing import Tuple, Optional, Union, Type
import math # Add math import
import math
from forecasting_model.utils.dataset_splitter import TimeSeriesDataset
from forecasting_model.utils.forecast_config_model import FeatureConfig, TrainingConfig, EvaluationConfig
@ -15,8 +15,6 @@ from forecasting_model.utils.forecast_config_model import FeatureConfig, Trainin
logger = logging.getLogger(__name__)
# --- Data Loading ---
# --- Feature Engineering ---
def engineer_features(df: pd.DataFrame, target_col: str, feature_config: FeatureConfig) -> pd.DataFrame:
"""
@ -95,7 +93,7 @@ def engineer_features(df: pd.DataFrame, target_col: str, feature_config: Feature
features_df['cos_day'] = np.cos(2 * np.pi * seconds_past_midnight / seconds_in_day)
# 5. Wavelet Transform (Optional)
# 5. Wavelet Transform
if feature_config.wavelet_transform and feature_config.wavelet_transform.apply:
logger.warning("Wavelet feature engineering is specified but not implemented yet.")
@ -237,9 +235,6 @@ def prepare_fold_data_and_loaders(
# Also need history for the input sequence length and max target horizon
max_horizon_needed = max(feature_config.forecast_horizon) if feature_config.forecast_horizon else 0
# Max history needed is max of lookback for features OR (sequence_length + max_horizon - 1) for targets/inputs
# Correct logic: Need `sequence_length` history for input, and `max_horizon` steps *after* the train data for targets/evaluation.
# The slicing needs to ensure enough data *before* train_idx[0] for feature lookback *and* sequence_length.
# Max history *before* the start of the training set
max_history_needed_before_train = max(max_lookback, feature_config.sequence_length)
slice_start_idx = max(0, int(train_idx[0] - max_history_needed_before_train))
@ -375,8 +370,6 @@ def prepare_fold_data_and_loaders(
except ValueError as e:
# This might happen if data cannot be safely cast (e.g., strings remain unexpectedly)
logger.error(f"Failed to convert data arrays to float32 before creating Tensors: {e}", exc_info=True)
# Consider adding more debug info here if it fails, e.g.:
# logger.debug(f"Data types in train_df before conversion: \n{train_df.dtypes}")
raise ValueError("Data could not be converted to numeric type (float32) for PyTorch.") from e

View File

@ -48,7 +48,7 @@ def evaluate_fold_predictions(
y_true_scaled: Numpy array of scaled ground truth targets (n_samples, len(horizons)).
y_pred_scaled: Numpy array of scaled model predictions (n_samples, len(horizons)).
target_scaler: The scaler fitted on the target variable. Used for inverse transform.
data_scaler: The scaler fitted on the input features (kept for potential future use or context, not used in current calculations). # ADDED Docstring
data_scaler: The scaler fitted on the input features (kept for potential future use or context, not used in current calculations).
eval_config: Configuration object for evaluation parameters.
fold_num: The current fold number (zero-based or -1 for classic).
output_dir: The base directory to save outputs.

View File

@ -47,12 +47,12 @@ class FeatureConfig(BaseModel):
lags: List[int] = []
rolling_window_sizes: List[int] = []
use_time_features: bool = True
sinus_curve: bool = False # Added
cosine_curve: bool = False # Added
sinus_curve: bool = False
cosine_curve: bool = False
wavelet_transform: Optional[WaveletTransformConfig] = None
fill_nan: Optional[Union[str, float, int]] = 'ffill' # Added (e.g., 'ffill', 0)
fill_nan: Optional[Union[str, float, int]] = 'ffill'
clipping: ClippingConfig = ClippingConfig() # Default instance
scaling_method: Optional[Literal['standard', 'minmax']] = 'standard' # Added literal validation
scaling_method: Optional[Literal['standard', 'minmax']] = 'standard'
@field_validator('lags', 'rolling_window_sizes', 'forecast_horizon')
def check_positive_list_values(cls, v: List[int]) -> List[int]:
@ -81,9 +81,9 @@ class TrainingConfig(BaseModel):
early_stopping_patience: Optional[int] = Field(None, ge=1) # Patience must be >= 1 if set
scheduler_step_size: Optional[int] = Field(None, gt=0)
scheduler_gamma: Optional[float] = Field(None, gt=0.0, lt=1.0)
gradient_clip_val: Optional[float] = Field(None, ge=0.0) # Added
num_workers: int = Field(0, ge=0) # Added
precision: Literal[16, 32, 64, 'bf16'] = 32 # Added
gradient_clip_val: Optional[float] = Field(None, ge=0.0)
num_workers: int = Field(0, ge=0)
precision: Literal[16, 32, 64, 'bf16'] = 32
class CrossValidationConfig(BaseModel):
"""Configuration for time series cross-validation."""
@ -102,7 +102,7 @@ class EvaluationConfig(BaseModel):
class OptunaConfig(BaseModel):
"""Optional configuration for Optuna hyperparameter optimization."""
enabled: bool = False
study_name: str = "default_study" # Added study_name
study_name: str = "default_study"
n_trials: int = Field(20, gt=0)
storage: Optional[str] = None # e.g., "sqlite:///output/hpo_results/study.db"
direction: Literal['minimize', 'maximize'] = 'minimize'

View File

@ -98,15 +98,15 @@ def run_single_fold(
# Variables to hold prediction results for plotting later
all_preds_scaled: Optional[np.ndarray] = None
all_targets_scaled: Optional[np.ndarray] = None
target_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None # Need to keep scaler reference
data_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None # Added to keep data scaler reference
target_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None
data_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None
prediction_target_time_index_h1: Optional[pd.DatetimeIndex] = None
pl_logger = None
# Variables to store paths of saved artifacts
saved_model_path: Optional[Path] = None
saved_target_scaler_path: Optional[Path] = None
saved_data_scaler_path: Optional[Path] = None # Added
saved_data_scaler_path: Optional[Path] = None
saved_input_size_path: Optional[Path] = None
saved_config_path: Optional[Path] = None

View File

@ -167,7 +167,7 @@ if __name__ == "__main__":
model_instance=loaded_artifact_info['model_instance'],
feature_config=provider_feature_config, # Pass config for seq_len, horizons
target_scaler=provider_target_scaler, # Pass the loaded target scaler
data_scaler=provider_data_scaler # --- >>> NEW: Pass the loaded data scaler <<< ---
data_scaler=provider_data_scaler
)
# Basic validation
if 1 not in provider_instance.get_forecast_horizons():

View File

@ -1,15 +1,15 @@
import argparse
import logging
import sys
import warnings # Import the warnings module
import warnings
import copy # For deep copying config
import copy
from pathlib import Path
import time
import pandas as pd
import torch
import yaml # Added for saving best config
import yaml
import optuna
import pytorch_lightning as pl

View File

@ -362,7 +362,7 @@ def run_hpo():
logger.setLevel(effective_log_level)
logger.info(f"Set log level to {level_name}. Logging HPO run to console and {log_file}")
if effective_log_level <= logging.DEBUG: logger.debug("Debug logging enabled.")
except (AttributeError, ValueError, TypeError) as e: # Added TypeError
except (AttributeError, ValueError, TypeError) as e:
logger.warning(f"Could not set log level from config: {e}. Defaulting to INFO.")
logger.setLevel(logging.INFO)
# Still try to log to a default file if possible