init
This commit is contained in:
8
forecasting_model/__init__.py
Normal file
8
forecasting_model/__init__.py
Normal file
@ -0,0 +1,8 @@
|
||||
"""
|
||||
Time Series Forecasting Module with LSTM
|
||||
|
||||
This module provides a configurable PyTorch-based LSTM model for time series forecasting,
|
||||
with support for feature engineering, cross-validation, and evaluation.
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
67
forecasting_model/data_processing.py
Normal file
67
forecasting_model/data_processing.py
Normal file
@ -0,0 +1,67 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
from typing import Tuple, Generator, List, Optional
|
||||
from utils.config_model import DataConfig, FeatureConfig, TrainingConfig, EvaluationConfig
|
||||
|
||||
# --- Data Loading ---
|
||||
def load_raw_data(config: DataConfig) -> pd.DataFrame:
|
||||
"""
|
||||
Load and preprocess raw data from CSV.
|
||||
"""
|
||||
# TODO: Implement CSV loading and datetime parsing
|
||||
pass
|
||||
|
||||
# --- Feature Engineering ---
|
||||
def engineer_features(df: pd.DataFrame, target_col: str, feature_config: FeatureConfig) -> pd.DataFrame:
|
||||
"""
|
||||
Create features from the target column and datetime index.
|
||||
"""
|
||||
# TODO: Implement feature engineering (lags, rolling stats, time features, wavelets)
|
||||
pass
|
||||
|
||||
# --- Cross Validation ---
|
||||
class TimeSeriesCrossValidationSplitter:
|
||||
def __init__(self, config: CrossValidationConfig, n_samples: int):
|
||||
self.config = config
|
||||
self.n_samples = n_samples
|
||||
|
||||
def split(self) -> Generator[Tuple[np.ndarray, np.ndarray, np.ndarray], None, None]:
|
||||
"""
|
||||
Generate train/val/test splits using expanding window approach.
|
||||
"""
|
||||
# TODO: Implement expanding window CV splitter
|
||||
pass
|
||||
|
||||
# --- Dataset Class ---
|
||||
class TimeSeriesDataset(Dataset):
|
||||
def __init__(self, data_array: np.ndarray, sequence_length: int, forecast_horizon: int):
|
||||
self.data = data_array
|
||||
self.sequence_length = sequence_length
|
||||
self.forecast_horizon = forecast_horizon
|
||||
|
||||
def __len__(self) -> int:
|
||||
# TODO: Implement length calculation
|
||||
pass
|
||||
|
||||
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
# TODO: Implement sequence extraction
|
||||
pass
|
||||
|
||||
# --- Data Preparation ---
|
||||
def prepare_fold_data_and_loaders(
|
||||
full_df: pd.DataFrame,
|
||||
train_idx: np.ndarray,
|
||||
val_idx: np.ndarray,
|
||||
test_idx: np.ndarray,
|
||||
feature_config: FeatureConfig,
|
||||
train_config: TrainingConfig,
|
||||
eval_config: EvaluationConfig
|
||||
) -> Tuple[DataLoader, DataLoader, DataLoader, object, int]:
|
||||
"""
|
||||
Prepare data loaders for a single fold.
|
||||
"""
|
||||
# TODO: Implement data preparation pipeline
|
||||
pass
|
82
forecasting_model/evaluation.py
Normal file
82
forecasting_model/evaluation.py
Normal file
@ -0,0 +1,82 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from typing import Dict, Any, Optional
|
||||
from utils.config_model import EvaluationConfig
|
||||
|
||||
def calculate_mae(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
||||
"""
|
||||
Calculate Mean Absolute Error.
|
||||
"""
|
||||
# TODO: Implement MAE calculation
|
||||
pass
|
||||
|
||||
def calculate_rmse(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
||||
"""
|
||||
Calculate Root Mean Squared Error.
|
||||
"""
|
||||
# TODO: Implement RMSE calculation
|
||||
pass
|
||||
|
||||
def plot_predictions_vs_actual(
|
||||
y_true: np.ndarray,
|
||||
y_pred: np.ndarray,
|
||||
title_suffix: str,
|
||||
filename: str,
|
||||
max_points: Optional[int] = None
|
||||
) -> None:
|
||||
"""
|
||||
Create line plot of predictions vs actual values.
|
||||
"""
|
||||
# TODO: Implement prediction vs actual plot
|
||||
pass
|
||||
|
||||
def plot_scatter_predictions(
|
||||
y_true: np.ndarray,
|
||||
y_pred: np.ndarray,
|
||||
title_suffix: str,
|
||||
filename: str
|
||||
) -> None:
|
||||
"""
|
||||
Create scatter plot of predictions vs actual values.
|
||||
"""
|
||||
# TODO: Implement scatter plot
|
||||
pass
|
||||
|
||||
def plot_residuals_time(
|
||||
residuals: np.ndarray,
|
||||
title_suffix: str,
|
||||
filename: str,
|
||||
max_points: Optional[int] = None
|
||||
) -> None:
|
||||
"""
|
||||
Create plot of residuals over time.
|
||||
"""
|
||||
# TODO: Implement residuals time plot
|
||||
pass
|
||||
|
||||
def plot_residuals_distribution(
|
||||
residuals: np.ndarray,
|
||||
title_suffix: str,
|
||||
filename: str
|
||||
) -> None:
|
||||
"""
|
||||
Create histogram/KDE of residuals.
|
||||
"""
|
||||
# TODO: Implement residuals distribution plot
|
||||
pass
|
||||
|
||||
def evaluate_fold(
|
||||
model: torch.nn.Module,
|
||||
test_loader: DataLoader,
|
||||
loss_fn: torch.nn.Module,
|
||||
device: torch.device,
|
||||
target_scaler: Any,
|
||||
eval_config: EvaluationConfig,
|
||||
fold_num: int
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Evaluate model on test set and generate plots.
|
||||
"""
|
||||
# TODO: Implement full evaluation pipeline
|
||||
pass
|
5
forecasting_model/io/__init__.py
Normal file
5
forecasting_model/io/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""
|
||||
IO utilities for the forecasting model.
|
||||
|
||||
This package contains utilities for data loading, saving, and visualization.
|
||||
"""
|
75
forecasting_model/io/plotting.py
Normal file
75
forecasting_model/io/plotting.py
Normal file
@ -0,0 +1,75 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
from typing import Optional
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def setup_plot_style() -> None:
|
||||
"""
|
||||
Set up consistent plotting style.
|
||||
"""
|
||||
# TODO: Implement plot style configuration
|
||||
pass
|
||||
|
||||
def save_plot(fig: plt.Figure, filename: str) -> None:
|
||||
"""
|
||||
Save plot to file with proper error handling.
|
||||
"""
|
||||
# TODO: Implement plot saving with error handling
|
||||
pass
|
||||
|
||||
def create_time_series_plot(
|
||||
x: np.ndarray,
|
||||
y_true: np.ndarray,
|
||||
y_pred: np.ndarray,
|
||||
title: str,
|
||||
xlabel: str,
|
||||
ylabel: str,
|
||||
max_points: Optional[int] = None
|
||||
) -> plt.Figure:
|
||||
"""
|
||||
Create a time series plot with actual vs predicted values.
|
||||
"""
|
||||
# TODO: Implement time series plot creation
|
||||
pass
|
||||
|
||||
def create_scatter_plot(
|
||||
y_true: np.ndarray,
|
||||
y_pred: np.ndarray,
|
||||
title: str,
|
||||
xlabel: str,
|
||||
ylabel: str
|
||||
) -> plt.Figure:
|
||||
"""
|
||||
Create a scatter plot of actual vs predicted values.
|
||||
"""
|
||||
# TODO: Implement scatter plot creation
|
||||
pass
|
||||
|
||||
def create_residuals_plot(
|
||||
x: np.ndarray,
|
||||
residuals: np.ndarray,
|
||||
title: str,
|
||||
xlabel: str,
|
||||
ylabel: str,
|
||||
max_points: Optional[int] = None
|
||||
) -> plt.Figure:
|
||||
"""
|
||||
Create a plot of residuals over time.
|
||||
"""
|
||||
# TODO: Implement residuals plot creation
|
||||
pass
|
||||
|
||||
def create_residuals_distribution_plot(
|
||||
residuals: np.ndarray,
|
||||
title: str,
|
||||
xlabel: str,
|
||||
ylabel: str
|
||||
) -> plt.Figure:
|
||||
"""
|
||||
Create a distribution plot of residuals.
|
||||
"""
|
||||
# TODO: Implement residuals distribution plot creation
|
||||
pass
|
28
forecasting_model/model.py
Normal file
28
forecasting_model/model.py
Normal file
@ -0,0 +1,28 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from typing import Optional
|
||||
from utils.config_model import ModelConfig
|
||||
|
||||
class LSTMForecastModel(nn.Module):
|
||||
def __init__(self, model_config: ModelConfig):
|
||||
super().__init__()
|
||||
self.config = model_config
|
||||
self.use_residual_skips = model_config.use_residual_skips
|
||||
|
||||
# TODO: Initialize LSTM layers
|
||||
# TODO: Initialize dropout
|
||||
# TODO: Initialize output layer
|
||||
# TODO: Initialize residual connection layer if needed
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Forward pass through the LSTM network.
|
||||
|
||||
Args:
|
||||
x: Input tensor of shape (batch_size, sequence_length, input_size)
|
||||
|
||||
Returns:
|
||||
Predictions tensor of shape (batch_size, forecast_horizon)
|
||||
"""
|
||||
# TODO: Implement forward pass with optional residual connections
|
||||
pass
|
50
forecasting_model/trainer.py
Normal file
50
forecasting_model/trainer.py
Normal file
@ -0,0 +1,50 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import DataLoader
|
||||
from typing import Optional, Dict, Any
|
||||
from ..utils.config_model import TrainingConfig
|
||||
|
||||
class Trainer:
|
||||
def __init__(
|
||||
self,
|
||||
model: nn.Module,
|
||||
train_loader: DataLoader,
|
||||
val_loader: DataLoader,
|
||||
loss_fn: nn.Module,
|
||||
device: torch.device,
|
||||
config: TrainingConfig,
|
||||
scheduler: Optional[torch.optim.lr_scheduler._LRScheduler] = None,
|
||||
target_scaler: Optional[Any] = None
|
||||
):
|
||||
self.model = model
|
||||
self.train_loader = train_loader
|
||||
self.val_loader = val_loader
|
||||
self.loss_fn = loss_fn
|
||||
self.device = device
|
||||
self.config = config
|
||||
self.scheduler = scheduler
|
||||
self.target_scaler = target_scaler
|
||||
|
||||
# TODO: Initialize optimizer (Adam)
|
||||
# TODO: Initialize early stopping if configured
|
||||
|
||||
def train_epoch(self) -> Dict[str, float]:
|
||||
"""
|
||||
Train for one epoch.
|
||||
"""
|
||||
# TODO: Implement training loop for one epoch
|
||||
pass
|
||||
|
||||
def evaluate(self, loader: DataLoader) -> Dict[str, float]:
|
||||
"""
|
||||
Evaluate model on given data loader.
|
||||
"""
|
||||
# TODO: Implement evaluation with metrics on original scale
|
||||
pass
|
||||
|
||||
def train(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Main training loop with validation and early stopping.
|
||||
"""
|
||||
# TODO: Implement full training loop with validation
|
||||
pass
|
5
forecasting_model/utils/__init__.py
Normal file
5
forecasting_model/utils/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""
|
||||
Utility functions and classes for the forecasting model.
|
||||
|
||||
This package contains configuration models, helper functions, and other utilities.
|
||||
"""
|
62
forecasting_model/utils/config_model.py
Normal file
62
forecasting_model/utils/config_model.py
Normal file
@ -0,0 +1,62 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional, List, Union
|
||||
from enum import Enum
|
||||
|
||||
class WaveletTransformConfig(BaseModel):
|
||||
apply: bool = False
|
||||
target_or_feature: str = "target"
|
||||
wavelet_type: str = "db4"
|
||||
level: int = 3
|
||||
use_coeffs: List[str] = ["approx", "detail_1"]
|
||||
|
||||
class DataConfig(BaseModel):
|
||||
data_path: str
|
||||
datetime_col: str
|
||||
target_col: str
|
||||
|
||||
class FeatureConfig(BaseModel):
|
||||
sequence_length: int
|
||||
forecast_horizon: int
|
||||
lags: List[int]
|
||||
rolling_window_sizes: List[int]
|
||||
use_time_features: bool
|
||||
scaling_method: Optional[str] = None
|
||||
wavelet_transform: Optional[WaveletTransformConfig] = None
|
||||
|
||||
class ModelConfig(BaseModel):
|
||||
input_size: Optional[int] = None # Will be calculated
|
||||
hidden_size: int
|
||||
num_layers: int
|
||||
dropout: float
|
||||
use_residual_skips: bool = False
|
||||
output_size: Optional[int] = None # Will be calculated
|
||||
|
||||
class TrainingConfig(BaseModel):
|
||||
batch_size: int
|
||||
epochs: int
|
||||
learning_rate: float
|
||||
loss_function: str
|
||||
device: str
|
||||
early_stopping_patience: Optional[int] = None
|
||||
scheduler_step_size: Optional[int] = None
|
||||
scheduler_gamma: Optional[float] = None
|
||||
|
||||
class CrossValidationConfig(BaseModel):
|
||||
n_splits: int
|
||||
test_size_fraction: float
|
||||
val_size_fraction: float
|
||||
initial_train_size: Optional[Union[int, float]] = None
|
||||
|
||||
class EvaluationConfig(BaseModel):
|
||||
metrics: List[str]
|
||||
eval_batch_size: int
|
||||
save_plots: bool
|
||||
plot_sample_size: int
|
||||
|
||||
class MainConfig(BaseModel):
|
||||
data: DataConfig
|
||||
features: FeatureConfig
|
||||
model: ModelConfig
|
||||
training: TrainingConfig
|
||||
cross_validation: CrossValidationConfig
|
||||
evaluation: EvaluationConfig
|
Reference in New Issue
Block a user