init

2025-05-02 10:45:06 +02:00
commit 7c9d809a82
29 changed files with 2931 additions and 0 deletions
--- a/forecasting_model/init.py
+++ b/forecasting_model/init.py
@ -0,0 +1,8 @@
+"""
+Time Series Forecasting Module with LSTM
+
+This module provides a configurable PyTorch-based LSTM model for time series forecasting,
+with support for feature engineering, cross-validation, and evaluation.
+"""
+
+__version__ = "0.1.0" 
--- a/forecasting_model/data_processing.py
+++ b/forecasting_model/data_processing.py
@ -0,0 +1,67 @@
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from typing import Tuple, Generator, List, Optional
+from utils.config_model import DataConfig, FeatureConfig, TrainingConfig, EvaluationConfig
+
+# --- Data Loading ---
+def load_raw_data(config: DataConfig) -> pd.DataFrame:
+    """
+    Load and preprocess raw data from CSV.
+    """
+    # TODO: Implement CSV loading and datetime parsing
+    pass
+
+# --- Feature Engineering ---
+def engineer_features(df: pd.DataFrame, target_col: str, feature_config: FeatureConfig) -> pd.DataFrame:
+    """
+    Create features from the target column and datetime index.
+    """
+    # TODO: Implement feature engineering (lags, rolling stats, time features, wavelets)
+    pass
+
+# --- Cross Validation ---
+class TimeSeriesCrossValidationSplitter:
+    def __init__(self, config: CrossValidationConfig, n_samples: int):
+        self.config = config
+        self.n_samples = n_samples
+
+    def split(self) -> Generator[Tuple[np.ndarray, np.ndarray, np.ndarray], None, None]:
+        """
+        Generate train/val/test splits using expanding window approach.
+        """
+        # TODO: Implement expanding window CV splitter
+        pass
+
+# --- Dataset Class ---
+class TimeSeriesDataset(Dataset):
+    def __init__(self, data_array: np.ndarray, sequence_length: int, forecast_horizon: int):
+        self.data = data_array
+        self.sequence_length = sequence_length
+        self.forecast_horizon = forecast_horizon
+
+    def __len__(self) -> int:
+        # TODO: Implement length calculation
+        pass
+
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        # TODO: Implement sequence extraction
+        pass
+
+# --- Data Preparation ---
+def prepare_fold_data_and_loaders(
+    full_df: pd.DataFrame,
+    train_idx: np.ndarray,
+    val_idx: np.ndarray,
+    test_idx: np.ndarray,
+    feature_config: FeatureConfig,
+    train_config: TrainingConfig,
+    eval_config: EvaluationConfig
+) -> Tuple[DataLoader, DataLoader, DataLoader, object, int]:
+    """
+    Prepare data loaders for a single fold.
+    """
+    # TODO: Implement data preparation pipeline
+    pass 
--- a/forecasting_model/evaluation.py
+++ b/forecasting_model/evaluation.py
@ -0,0 +1,82 @@
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from typing import Dict, Any, Optional
+from utils.config_model import EvaluationConfig
+
+def calculate_mae(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+    """
+    Calculate Mean Absolute Error.
+    """
+    # TODO: Implement MAE calculation
+    pass
+
+def calculate_rmse(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+    """
+    Calculate Root Mean Squared Error.
+    """
+    # TODO: Implement RMSE calculation
+    pass
+
+def plot_predictions_vs_actual(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    title_suffix: str,
+    filename: str,
+    max_points: Optional[int] = None
+) -> None:
+    """
+    Create line plot of predictions vs actual values.
+    """
+    # TODO: Implement prediction vs actual plot
+    pass
+
+def plot_scatter_predictions(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    title_suffix: str,
+    filename: str
+) -> None:
+    """
+    Create scatter plot of predictions vs actual values.
+    """
+    # TODO: Implement scatter plot
+    pass
+
+def plot_residuals_time(
+    residuals: np.ndarray,
+    title_suffix: str,
+    filename: str,
+    max_points: Optional[int] = None
+) -> None:
+    """
+    Create plot of residuals over time.
+    """
+    # TODO: Implement residuals time plot
+    pass
+
+def plot_residuals_distribution(
+    residuals: np.ndarray,
+    title_suffix: str,
+    filename: str
+) -> None:
+    """
+    Create histogram/KDE of residuals.
+    """
+    # TODO: Implement residuals distribution plot
+    pass
+
+def evaluate_fold(
+    model: torch.nn.Module,
+    test_loader: DataLoader,
+    loss_fn: torch.nn.Module,
+    device: torch.device,
+    target_scaler: Any,
+    eval_config: EvaluationConfig,
+    fold_num: int
+) -> Dict[str, float]:
+    """
+    Evaluate model on test set and generate plots.
+    """
+    # TODO: Implement full evaluation pipeline
+    pass
--- a/forecasting_model/io/init.py
+++ b/forecasting_model/io/init.py
@ -0,0 +1,5 @@
+"""
+IO utilities for the forecasting model.
+
+This package contains utilities for data loading, saving, and visualization.
+"""
--- a/forecasting_model/io/plotting.py
+++ b/forecasting_model/io/plotting.py
@ -0,0 +1,75 @@
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+from typing import Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+def setup_plot_style() -> None:
+    """
+    Set up consistent plotting style.
+    """
+    # TODO: Implement plot style configuration
+    pass
+
+def save_plot(fig: plt.Figure, filename: str) -> None:
+    """
+    Save plot to file with proper error handling.
+    """
+    # TODO: Implement plot saving with error handling
+    pass
+
+def create_time_series_plot(
+    x: np.ndarray,
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    title: str,
+    xlabel: str,
+    ylabel: str,
+    max_points: Optional[int] = None
+) -> plt.Figure:
+    """
+    Create a time series plot with actual vs predicted values.
+    """
+    # TODO: Implement time series plot creation
+    pass
+
+def create_scatter_plot(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    title: str,
+    xlabel: str,
+    ylabel: str
+) -> plt.Figure:
+    """
+    Create a scatter plot of actual vs predicted values.
+    """
+    # TODO: Implement scatter plot creation
+    pass
+
+def create_residuals_plot(
+    x: np.ndarray,
+    residuals: np.ndarray,
+    title: str,
+    xlabel: str,
+    ylabel: str,
+    max_points: Optional[int] = None
+) -> plt.Figure:
+    """
+    Create a plot of residuals over time.
+    """
+    # TODO: Implement residuals plot creation
+    pass
+
+def create_residuals_distribution_plot(
+    residuals: np.ndarray,
+    title: str,
+    xlabel: str,
+    ylabel: str
+) -> plt.Figure:
+    """
+    Create a distribution plot of residuals.
+    """
+    # TODO: Implement residuals distribution plot creation
+    pass 
--- a/forecasting_model/model.py
+++ b/forecasting_model/model.py
@ -0,0 +1,28 @@
+import torch
+import torch.nn as nn
+from typing import Optional
+from utils.config_model import ModelConfig
+
+class LSTMForecastModel(nn.Module):
+    def __init__(self, model_config: ModelConfig):
+        super().__init__()
+        self.config = model_config
+        self.use_residual_skips = model_config.use_residual_skips
+
+        # TODO: Initialize LSTM layers
+        # TODO: Initialize dropout
+        # TODO: Initialize output layer
+        # TODO: Initialize residual connection layer if needed
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass through the LSTM network.
+        
+        Args:
+            x: Input tensor of shape (batch_size, sequence_length, input_size)
+            
+        Returns:
+            Predictions tensor of shape (batch_size, forecast_horizon)
+        """
+        # TODO: Implement forward pass with optional residual connections
+        pass 
--- a/forecasting_model/trainer.py
+++ b/forecasting_model/trainer.py
@ -0,0 +1,50 @@
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from typing import Optional, Dict, Any
+from ..utils.config_model import TrainingConfig
+
+class Trainer:
+    def __init__(
+        self,
+        model: nn.Module,
+        train_loader: DataLoader,
+        val_loader: DataLoader,
+        loss_fn: nn.Module,
+        device: torch.device,
+        config: TrainingConfig,
+        scheduler: Optional[torch.optim.lr_scheduler._LRScheduler] = None,
+        target_scaler: Optional[Any] = None
+    ):
+        self.model = model
+        self.train_loader = train_loader
+        self.val_loader = val_loader
+        self.loss_fn = loss_fn
+        self.device = device
+        self.config = config
+        self.scheduler = scheduler
+        self.target_scaler = target_scaler
+        
+        # TODO: Initialize optimizer (Adam)
+        # TODO: Initialize early stopping if configured
+
+    def train_epoch(self) -> Dict[str, float]:
+        """
+        Train for one epoch.
+        """
+        # TODO: Implement training loop for one epoch
+        pass
+
+    def evaluate(self, loader: DataLoader) -> Dict[str, float]:
+        """
+        Evaluate model on given data loader.
+        """
+        # TODO: Implement evaluation with metrics on original scale
+        pass
+
+    def train(self) -> Dict[str, Any]:
+        """
+        Main training loop with validation and early stopping.
+        """
+        # TODO: Implement full training loop with validation
+        pass 
--- a/forecasting_model/utils/init.py
+++ b/forecasting_model/utils/init.py
@ -0,0 +1,5 @@
+"""
+Utility functions and classes for the forecasting model.
+
+This package contains configuration models, helper functions, and other utilities.
+"""
--- a/forecasting_model/utils/config_model.py
+++ b/forecasting_model/utils/config_model.py
@ -0,0 +1,62 @@
+from pydantic import BaseModel, Field
+from typing import Optional, List, Union
+from enum import Enum
+
+class WaveletTransformConfig(BaseModel):
+    apply: bool = False
+    target_or_feature: str = "target"
+    wavelet_type: str = "db4"
+    level: int = 3
+    use_coeffs: List[str] = ["approx", "detail_1"]
+
+class DataConfig(BaseModel):
+    data_path: str
+    datetime_col: str
+    target_col: str
+
+class FeatureConfig(BaseModel):
+    sequence_length: int
+    forecast_horizon: int
+    lags: List[int]
+    rolling_window_sizes: List[int]
+    use_time_features: bool
+    scaling_method: Optional[str] = None
+    wavelet_transform: Optional[WaveletTransformConfig] = None
+
+class ModelConfig(BaseModel):
+    input_size: Optional[int] = None  # Will be calculated
+    hidden_size: int
+    num_layers: int
+    dropout: float
+    use_residual_skips: bool = False
+    output_size: Optional[int] = None  # Will be calculated
+
+class TrainingConfig(BaseModel):
+    batch_size: int
+    epochs: int
+    learning_rate: float
+    loss_function: str
+    device: str
+    early_stopping_patience: Optional[int] = None
+    scheduler_step_size: Optional[int] = None
+    scheduler_gamma: Optional[float] = None
+
+class CrossValidationConfig(BaseModel):
+    n_splits: int
+    test_size_fraction: float
+    val_size_fraction: float
+    initial_train_size: Optional[Union[int, float]] = None
+
+class EvaluationConfig(BaseModel):
+    metrics: List[str]
+    eval_batch_size: int
+    save_plots: bool
+    plot_sample_size: int
+
+class MainConfig(BaseModel):
+    data: DataConfig
+    features: FeatureConfig
+    model: ModelConfig
+    training: TrainingConfig
+    cross_validation: CrossValidationConfig
+    evaluation: EvaluationConfig