This commit is contained in:
2025-05-02 10:45:06 +02:00
commit 7c9d809a82
29 changed files with 2931 additions and 0 deletions

View File

@ -0,0 +1,8 @@
"""
Time Series Forecasting Module with LSTM
This module provides a configurable PyTorch-based LSTM model for time series forecasting,
with support for feature engineering, cross-validation, and evaluation.
"""
__version__ = "0.1.0"

View File

@ -0,0 +1,67 @@
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from typing import Tuple, Generator, List, Optional
from utils.config_model import DataConfig, FeatureConfig, TrainingConfig, EvaluationConfig
# --- Data Loading ---
def load_raw_data(config: DataConfig) -> pd.DataFrame:
"""
Load and preprocess raw data from CSV.
"""
# TODO: Implement CSV loading and datetime parsing
pass
# --- Feature Engineering ---
def engineer_features(df: pd.DataFrame, target_col: str, feature_config: FeatureConfig) -> pd.DataFrame:
"""
Create features from the target column and datetime index.
"""
# TODO: Implement feature engineering (lags, rolling stats, time features, wavelets)
pass
# --- Cross Validation ---
class TimeSeriesCrossValidationSplitter:
def __init__(self, config: CrossValidationConfig, n_samples: int):
self.config = config
self.n_samples = n_samples
def split(self) -> Generator[Tuple[np.ndarray, np.ndarray, np.ndarray], None, None]:
"""
Generate train/val/test splits using expanding window approach.
"""
# TODO: Implement expanding window CV splitter
pass
# --- Dataset Class ---
class TimeSeriesDataset(Dataset):
def __init__(self, data_array: np.ndarray, sequence_length: int, forecast_horizon: int):
self.data = data_array
self.sequence_length = sequence_length
self.forecast_horizon = forecast_horizon
def __len__(self) -> int:
# TODO: Implement length calculation
pass
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
# TODO: Implement sequence extraction
pass
# --- Data Preparation ---
def prepare_fold_data_and_loaders(
full_df: pd.DataFrame,
train_idx: np.ndarray,
val_idx: np.ndarray,
test_idx: np.ndarray,
feature_config: FeatureConfig,
train_config: TrainingConfig,
eval_config: EvaluationConfig
) -> Tuple[DataLoader, DataLoader, DataLoader, object, int]:
"""
Prepare data loaders for a single fold.
"""
# TODO: Implement data preparation pipeline
pass

View File

@ -0,0 +1,82 @@
import numpy as np
import torch
from torch.utils.data import DataLoader
from typing import Dict, Any, Optional
from utils.config_model import EvaluationConfig
def calculate_mae(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate Mean Absolute Error.
"""
# TODO: Implement MAE calculation
pass
def calculate_rmse(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate Root Mean Squared Error.
"""
# TODO: Implement RMSE calculation
pass
def plot_predictions_vs_actual(
y_true: np.ndarray,
y_pred: np.ndarray,
title_suffix: str,
filename: str,
max_points: Optional[int] = None
) -> None:
"""
Create line plot of predictions vs actual values.
"""
# TODO: Implement prediction vs actual plot
pass
def plot_scatter_predictions(
y_true: np.ndarray,
y_pred: np.ndarray,
title_suffix: str,
filename: str
) -> None:
"""
Create scatter plot of predictions vs actual values.
"""
# TODO: Implement scatter plot
pass
def plot_residuals_time(
residuals: np.ndarray,
title_suffix: str,
filename: str,
max_points: Optional[int] = None
) -> None:
"""
Create plot of residuals over time.
"""
# TODO: Implement residuals time plot
pass
def plot_residuals_distribution(
residuals: np.ndarray,
title_suffix: str,
filename: str
) -> None:
"""
Create histogram/KDE of residuals.
"""
# TODO: Implement residuals distribution plot
pass
def evaluate_fold(
model: torch.nn.Module,
test_loader: DataLoader,
loss_fn: torch.nn.Module,
device: torch.device,
target_scaler: Any,
eval_config: EvaluationConfig,
fold_num: int
) -> Dict[str, float]:
"""
Evaluate model on test set and generate plots.
"""
# TODO: Implement full evaluation pipeline
pass

View File

@ -0,0 +1,5 @@
"""
IO utilities for the forecasting model.
This package contains utilities for data loading, saving, and visualization.
"""

View File

@ -0,0 +1,75 @@
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from typing import Optional
import logging
logger = logging.getLogger(__name__)
def setup_plot_style() -> None:
"""
Set up consistent plotting style.
"""
# TODO: Implement plot style configuration
pass
def save_plot(fig: plt.Figure, filename: str) -> None:
"""
Save plot to file with proper error handling.
"""
# TODO: Implement plot saving with error handling
pass
def create_time_series_plot(
x: np.ndarray,
y_true: np.ndarray,
y_pred: np.ndarray,
title: str,
xlabel: str,
ylabel: str,
max_points: Optional[int] = None
) -> plt.Figure:
"""
Create a time series plot with actual vs predicted values.
"""
# TODO: Implement time series plot creation
pass
def create_scatter_plot(
y_true: np.ndarray,
y_pred: np.ndarray,
title: str,
xlabel: str,
ylabel: str
) -> plt.Figure:
"""
Create a scatter plot of actual vs predicted values.
"""
# TODO: Implement scatter plot creation
pass
def create_residuals_plot(
x: np.ndarray,
residuals: np.ndarray,
title: str,
xlabel: str,
ylabel: str,
max_points: Optional[int] = None
) -> plt.Figure:
"""
Create a plot of residuals over time.
"""
# TODO: Implement residuals plot creation
pass
def create_residuals_distribution_plot(
residuals: np.ndarray,
title: str,
xlabel: str,
ylabel: str
) -> plt.Figure:
"""
Create a distribution plot of residuals.
"""
# TODO: Implement residuals distribution plot creation
pass

View File

@ -0,0 +1,28 @@
import torch
import torch.nn as nn
from typing import Optional
from utils.config_model import ModelConfig
class LSTMForecastModel(nn.Module):
def __init__(self, model_config: ModelConfig):
super().__init__()
self.config = model_config
self.use_residual_skips = model_config.use_residual_skips
# TODO: Initialize LSTM layers
# TODO: Initialize dropout
# TODO: Initialize output layer
# TODO: Initialize residual connection layer if needed
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
Forward pass through the LSTM network.
Args:
x: Input tensor of shape (batch_size, sequence_length, input_size)
Returns:
Predictions tensor of shape (batch_size, forecast_horizon)
"""
# TODO: Implement forward pass with optional residual connections
pass

View File

@ -0,0 +1,50 @@
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from typing import Optional, Dict, Any
from ..utils.config_model import TrainingConfig
class Trainer:
def __init__(
self,
model: nn.Module,
train_loader: DataLoader,
val_loader: DataLoader,
loss_fn: nn.Module,
device: torch.device,
config: TrainingConfig,
scheduler: Optional[torch.optim.lr_scheduler._LRScheduler] = None,
target_scaler: Optional[Any] = None
):
self.model = model
self.train_loader = train_loader
self.val_loader = val_loader
self.loss_fn = loss_fn
self.device = device
self.config = config
self.scheduler = scheduler
self.target_scaler = target_scaler
# TODO: Initialize optimizer (Adam)
# TODO: Initialize early stopping if configured
def train_epoch(self) -> Dict[str, float]:
"""
Train for one epoch.
"""
# TODO: Implement training loop for one epoch
pass
def evaluate(self, loader: DataLoader) -> Dict[str, float]:
"""
Evaluate model on given data loader.
"""
# TODO: Implement evaluation with metrics on original scale
pass
def train(self) -> Dict[str, Any]:
"""
Main training loop with validation and early stopping.
"""
# TODO: Implement full training loop with validation
pass

View File

@ -0,0 +1,5 @@
"""
Utility functions and classes for the forecasting model.
This package contains configuration models, helper functions, and other utilities.
"""

View File

@ -0,0 +1,62 @@
from pydantic import BaseModel, Field
from typing import Optional, List, Union
from enum import Enum
class WaveletTransformConfig(BaseModel):
apply: bool = False
target_or_feature: str = "target"
wavelet_type: str = "db4"
level: int = 3
use_coeffs: List[str] = ["approx", "detail_1"]
class DataConfig(BaseModel):
data_path: str
datetime_col: str
target_col: str
class FeatureConfig(BaseModel):
sequence_length: int
forecast_horizon: int
lags: List[int]
rolling_window_sizes: List[int]
use_time_features: bool
scaling_method: Optional[str] = None
wavelet_transform: Optional[WaveletTransformConfig] = None
class ModelConfig(BaseModel):
input_size: Optional[int] = None # Will be calculated
hidden_size: int
num_layers: int
dropout: float
use_residual_skips: bool = False
output_size: Optional[int] = None # Will be calculated
class TrainingConfig(BaseModel):
batch_size: int
epochs: int
learning_rate: float
loss_function: str
device: str
early_stopping_patience: Optional[int] = None
scheduler_step_size: Optional[int] = None
scheduler_gamma: Optional[float] = None
class CrossValidationConfig(BaseModel):
n_splits: int
test_size_fraction: float
val_size_fraction: float
initial_train_size: Optional[Union[int, float]] = None
class EvaluationConfig(BaseModel):
metrics: List[str]
eval_batch_size: int
save_plots: bool
plot_sample_size: int
class MainConfig(BaseModel):
data: DataConfig
features: FeatureConfig
model: ModelConfig
training: TrainingConfig
cross_validation: CrossValidationConfig
evaluation: EvaluationConfig