intermediate backup
This commit is contained in:
@ -93,7 +93,7 @@ def perform_stationarity_tests(series: pd.Series) -> Tuple[Optional[Dict[str, An
|
|||||||
'#Lags Used',
|
'#Lags Used',
|
||||||
'#Observations Used',
|
'#Observations Used',
|
||||||
'Critical Values',
|
'Critical Values',
|
||||||
'IC Best' # Added by newer statsmodels
|
'IC Best'
|
||||||
]
|
]
|
||||||
# Only map existing keys from result tuple
|
# Only map existing keys from result tuple
|
||||||
results['adf'] = {key: val for key, val in zip(adf_keys, adf_test) if key != 'IC Best'}
|
results['adf'] = {key: val for key, val in zip(adf_keys, adf_test) if key != 'IC Best'}
|
||||||
|
BIN
entrix_case_callenge.zip
Normal file
BIN
entrix_case_callenge.zip
Normal file
Binary file not shown.
@ -44,7 +44,7 @@ def load_fold_model_and_objects(
|
|||||||
# 2. Load Saved Objects using torch.load
|
# 2. Load Saved Objects using torch.load
|
||||||
test_loader_path = fold_dir / "test_loader.pt"
|
test_loader_path = fold_dir / "test_loader.pt"
|
||||||
target_scaler_path = fold_dir / "target_scaler.pt"
|
target_scaler_path = fold_dir / "target_scaler.pt"
|
||||||
data_scaler_path = fold_dir / "data_scaler.pt" # Added path for data_scaler
|
data_scaler_path = fold_dir / "data_scaler.pt"
|
||||||
input_size_path = fold_dir / "input_size.pt"
|
input_size_path = fold_dir / "input_size.pt"
|
||||||
prediction_index_path = fold_dir / "prediction_target_time_index.pt"
|
prediction_index_path = fold_dir / "prediction_target_time_index.pt"
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ import torch
|
|||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||||
from typing import Tuple, Optional, Union, Type
|
from typing import Tuple, Optional, Union, Type
|
||||||
import math # Add math import
|
import math
|
||||||
|
|
||||||
from forecasting_model.utils.dataset_splitter import TimeSeriesDataset
|
from forecasting_model.utils.dataset_splitter import TimeSeriesDataset
|
||||||
from forecasting_model.utils.forecast_config_model import FeatureConfig, TrainingConfig, EvaluationConfig
|
from forecasting_model.utils.forecast_config_model import FeatureConfig, TrainingConfig, EvaluationConfig
|
||||||
@ -15,8 +15,6 @@ from forecasting_model.utils.forecast_config_model import FeatureConfig, Trainin
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# --- Data Loading ---
|
|
||||||
|
|
||||||
# --- Feature Engineering ---
|
# --- Feature Engineering ---
|
||||||
def engineer_features(df: pd.DataFrame, target_col: str, feature_config: FeatureConfig) -> pd.DataFrame:
|
def engineer_features(df: pd.DataFrame, target_col: str, feature_config: FeatureConfig) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
@ -95,7 +93,7 @@ def engineer_features(df: pd.DataFrame, target_col: str, feature_config: Feature
|
|||||||
features_df['cos_day'] = np.cos(2 * np.pi * seconds_past_midnight / seconds_in_day)
|
features_df['cos_day'] = np.cos(2 * np.pi * seconds_past_midnight / seconds_in_day)
|
||||||
|
|
||||||
|
|
||||||
# 5. Wavelet Transform (Optional)
|
# 5. Wavelet Transform
|
||||||
if feature_config.wavelet_transform and feature_config.wavelet_transform.apply:
|
if feature_config.wavelet_transform and feature_config.wavelet_transform.apply:
|
||||||
logger.warning("Wavelet feature engineering is specified but not implemented yet.")
|
logger.warning("Wavelet feature engineering is specified but not implemented yet.")
|
||||||
|
|
||||||
@ -237,9 +235,6 @@ def prepare_fold_data_and_loaders(
|
|||||||
# Also need history for the input sequence length and max target horizon
|
# Also need history for the input sequence length and max target horizon
|
||||||
max_horizon_needed = max(feature_config.forecast_horizon) if feature_config.forecast_horizon else 0
|
max_horizon_needed = max(feature_config.forecast_horizon) if feature_config.forecast_horizon else 0
|
||||||
# Max history needed is max of lookback for features OR (sequence_length + max_horizon - 1) for targets/inputs
|
# Max history needed is max of lookback for features OR (sequence_length + max_horizon - 1) for targets/inputs
|
||||||
# Correct logic: Need `sequence_length` history for input, and `max_horizon` steps *after* the train data for targets/evaluation.
|
|
||||||
# The slicing needs to ensure enough data *before* train_idx[0] for feature lookback *and* sequence_length.
|
|
||||||
# Max history *before* the start of the training set
|
|
||||||
max_history_needed_before_train = max(max_lookback, feature_config.sequence_length)
|
max_history_needed_before_train = max(max_lookback, feature_config.sequence_length)
|
||||||
|
|
||||||
slice_start_idx = max(0, int(train_idx[0] - max_history_needed_before_train))
|
slice_start_idx = max(0, int(train_idx[0] - max_history_needed_before_train))
|
||||||
@ -375,8 +370,6 @@ def prepare_fold_data_and_loaders(
|
|||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
# This might happen if data cannot be safely cast (e.g., strings remain unexpectedly)
|
# This might happen if data cannot be safely cast (e.g., strings remain unexpectedly)
|
||||||
logger.error(f"Failed to convert data arrays to float32 before creating Tensors: {e}", exc_info=True)
|
logger.error(f"Failed to convert data arrays to float32 before creating Tensors: {e}", exc_info=True)
|
||||||
# Consider adding more debug info here if it fails, e.g.:
|
|
||||||
# logger.debug(f"Data types in train_df before conversion: \n{train_df.dtypes}")
|
|
||||||
raise ValueError("Data could not be converted to numeric type (float32) for PyTorch.") from e
|
raise ValueError("Data could not be converted to numeric type (float32) for PyTorch.") from e
|
||||||
|
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ def evaluate_fold_predictions(
|
|||||||
y_true_scaled: Numpy array of scaled ground truth targets (n_samples, len(horizons)).
|
y_true_scaled: Numpy array of scaled ground truth targets (n_samples, len(horizons)).
|
||||||
y_pred_scaled: Numpy array of scaled model predictions (n_samples, len(horizons)).
|
y_pred_scaled: Numpy array of scaled model predictions (n_samples, len(horizons)).
|
||||||
target_scaler: The scaler fitted on the target variable. Used for inverse transform.
|
target_scaler: The scaler fitted on the target variable. Used for inverse transform.
|
||||||
data_scaler: The scaler fitted on the input features (kept for potential future use or context, not used in current calculations). # ADDED Docstring
|
data_scaler: The scaler fitted on the input features (kept for potential future use or context, not used in current calculations).
|
||||||
eval_config: Configuration object for evaluation parameters.
|
eval_config: Configuration object for evaluation parameters.
|
||||||
fold_num: The current fold number (zero-based or -1 for classic).
|
fold_num: The current fold number (zero-based or -1 for classic).
|
||||||
output_dir: The base directory to save outputs.
|
output_dir: The base directory to save outputs.
|
||||||
|
@ -47,12 +47,12 @@ class FeatureConfig(BaseModel):
|
|||||||
lags: List[int] = []
|
lags: List[int] = []
|
||||||
rolling_window_sizes: List[int] = []
|
rolling_window_sizes: List[int] = []
|
||||||
use_time_features: bool = True
|
use_time_features: bool = True
|
||||||
sinus_curve: bool = False # Added
|
sinus_curve: bool = False
|
||||||
cosine_curve: bool = False # Added
|
cosine_curve: bool = False
|
||||||
wavelet_transform: Optional[WaveletTransformConfig] = None
|
wavelet_transform: Optional[WaveletTransformConfig] = None
|
||||||
fill_nan: Optional[Union[str, float, int]] = 'ffill' # Added (e.g., 'ffill', 0)
|
fill_nan: Optional[Union[str, float, int]] = 'ffill'
|
||||||
clipping: ClippingConfig = ClippingConfig() # Default instance
|
clipping: ClippingConfig = ClippingConfig() # Default instance
|
||||||
scaling_method: Optional[Literal['standard', 'minmax']] = 'standard' # Added literal validation
|
scaling_method: Optional[Literal['standard', 'minmax']] = 'standard'
|
||||||
|
|
||||||
@field_validator('lags', 'rolling_window_sizes', 'forecast_horizon')
|
@field_validator('lags', 'rolling_window_sizes', 'forecast_horizon')
|
||||||
def check_positive_list_values(cls, v: List[int]) -> List[int]:
|
def check_positive_list_values(cls, v: List[int]) -> List[int]:
|
||||||
@ -81,9 +81,9 @@ class TrainingConfig(BaseModel):
|
|||||||
early_stopping_patience: Optional[int] = Field(None, ge=1) # Patience must be >= 1 if set
|
early_stopping_patience: Optional[int] = Field(None, ge=1) # Patience must be >= 1 if set
|
||||||
scheduler_step_size: Optional[int] = Field(None, gt=0)
|
scheduler_step_size: Optional[int] = Field(None, gt=0)
|
||||||
scheduler_gamma: Optional[float] = Field(None, gt=0.0, lt=1.0)
|
scheduler_gamma: Optional[float] = Field(None, gt=0.0, lt=1.0)
|
||||||
gradient_clip_val: Optional[float] = Field(None, ge=0.0) # Added
|
gradient_clip_val: Optional[float] = Field(None, ge=0.0)
|
||||||
num_workers: int = Field(0, ge=0) # Added
|
num_workers: int = Field(0, ge=0)
|
||||||
precision: Literal[16, 32, 64, 'bf16'] = 32 # Added
|
precision: Literal[16, 32, 64, 'bf16'] = 32
|
||||||
|
|
||||||
class CrossValidationConfig(BaseModel):
|
class CrossValidationConfig(BaseModel):
|
||||||
"""Configuration for time series cross-validation."""
|
"""Configuration for time series cross-validation."""
|
||||||
@ -102,7 +102,7 @@ class EvaluationConfig(BaseModel):
|
|||||||
class OptunaConfig(BaseModel):
|
class OptunaConfig(BaseModel):
|
||||||
"""Optional configuration for Optuna hyperparameter optimization."""
|
"""Optional configuration for Optuna hyperparameter optimization."""
|
||||||
enabled: bool = False
|
enabled: bool = False
|
||||||
study_name: str = "default_study" # Added study_name
|
study_name: str = "default_study"
|
||||||
n_trials: int = Field(20, gt=0)
|
n_trials: int = Field(20, gt=0)
|
||||||
storage: Optional[str] = None # e.g., "sqlite:///output/hpo_results/study.db"
|
storage: Optional[str] = None # e.g., "sqlite:///output/hpo_results/study.db"
|
||||||
direction: Literal['minimize', 'maximize'] = 'minimize'
|
direction: Literal['minimize', 'maximize'] = 'minimize'
|
||||||
|
@ -98,15 +98,15 @@ def run_single_fold(
|
|||||||
# Variables to hold prediction results for plotting later
|
# Variables to hold prediction results for plotting later
|
||||||
all_preds_scaled: Optional[np.ndarray] = None
|
all_preds_scaled: Optional[np.ndarray] = None
|
||||||
all_targets_scaled: Optional[np.ndarray] = None
|
all_targets_scaled: Optional[np.ndarray] = None
|
||||||
target_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None # Need to keep scaler reference
|
target_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None
|
||||||
data_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None # Added to keep data scaler reference
|
data_scaler: Optional[Union[StandardScaler, MinMaxScaler]] = None
|
||||||
prediction_target_time_index_h1: Optional[pd.DatetimeIndex] = None
|
prediction_target_time_index_h1: Optional[pd.DatetimeIndex] = None
|
||||||
pl_logger = None
|
pl_logger = None
|
||||||
|
|
||||||
# Variables to store paths of saved artifacts
|
# Variables to store paths of saved artifacts
|
||||||
saved_model_path: Optional[Path] = None
|
saved_model_path: Optional[Path] = None
|
||||||
saved_target_scaler_path: Optional[Path] = None
|
saved_target_scaler_path: Optional[Path] = None
|
||||||
saved_data_scaler_path: Optional[Path] = None # Added
|
saved_data_scaler_path: Optional[Path] = None
|
||||||
saved_input_size_path: Optional[Path] = None
|
saved_input_size_path: Optional[Path] = None
|
||||||
saved_config_path: Optional[Path] = None
|
saved_config_path: Optional[Path] = None
|
||||||
|
|
||||||
|
@ -167,7 +167,7 @@ if __name__ == "__main__":
|
|||||||
model_instance=loaded_artifact_info['model_instance'],
|
model_instance=loaded_artifact_info['model_instance'],
|
||||||
feature_config=provider_feature_config, # Pass config for seq_len, horizons
|
feature_config=provider_feature_config, # Pass config for seq_len, horizons
|
||||||
target_scaler=provider_target_scaler, # Pass the loaded target scaler
|
target_scaler=provider_target_scaler, # Pass the loaded target scaler
|
||||||
data_scaler=provider_data_scaler # --- >>> NEW: Pass the loaded data scaler <<< ---
|
data_scaler=provider_data_scaler
|
||||||
)
|
)
|
||||||
# Basic validation
|
# Basic validation
|
||||||
if 1 not in provider_instance.get_forecast_horizons():
|
if 1 not in provider_instance.get_forecast_horizons():
|
||||||
|
@ -1,15 +1,15 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import warnings # Import the warnings module
|
import warnings
|
||||||
|
|
||||||
import copy # For deep copying config
|
import copy
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import torch
|
import torch
|
||||||
import yaml # Added for saving best config
|
import yaml
|
||||||
|
|
||||||
import optuna
|
import optuna
|
||||||
import pytorch_lightning as pl
|
import pytorch_lightning as pl
|
||||||
|
@ -362,7 +362,7 @@ def run_hpo():
|
|||||||
logger.setLevel(effective_log_level)
|
logger.setLevel(effective_log_level)
|
||||||
logger.info(f"Set log level to {level_name}. Logging HPO run to console and {log_file}")
|
logger.info(f"Set log level to {level_name}. Logging HPO run to console and {log_file}")
|
||||||
if effective_log_level <= logging.DEBUG: logger.debug("Debug logging enabled.")
|
if effective_log_level <= logging.DEBUG: logger.debug("Debug logging enabled.")
|
||||||
except (AttributeError, ValueError, TypeError) as e: # Added TypeError
|
except (AttributeError, ValueError, TypeError) as e:
|
||||||
logger.warning(f"Could not set log level from config: {e}. Defaulting to INFO.")
|
logger.warning(f"Could not set log level from config: {e}. Defaulting to INFO.")
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
# Still try to log to a default file if possible
|
# Still try to log to a default file if possible
|
||||||
|
Reference in New Issue
Block a user