This commit is contained in:
2025-05-02 10:45:06 +02:00
commit 7c9d809a82
29 changed files with 2931 additions and 0 deletions

View File

View File

@ -0,0 +1,306 @@
% LaTeX EDA Report Template
\documentclass[11pt, a4paper]{article}
% --- Packages ---
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{lmodern} % Use Latin Modern fonts
\usepackage[margin=1in]{geometry} % Set page margins
\usepackage{graphicx} % Required for including images
% \graphicspath{{../reports/plots/}} % REMOVE OR COMMENT OUT THIS LINE
\usepackage{booktabs} % For professional quality tables (\toprule, \midrule, \bottomrule)
\usepackage{amsmath} % For math symbols and environments
\usepackage{datetime2} % For date formatting (optional, can use simple text)
\usepackage{float} % For finer control over figure placement (e.g., [H] option)
\usepackage{caption} % For customizing captions
\usepackage{hyperref} % For clickable links (optional)
\usepackage{sectsty} % To potentially adjust section font sizes/styles (optional)
\usepackage{parskip} % Use vertical space between paragraphs instead of indentation
\usepackage{ifthen} % ADD THIS PACKAGE for conditional logic
% --- Hyperref Setup (Optional) ---
\hypersetup{
colorlinks=true,
linkcolor=blue,
filecolor=magenta,
urlcolor=cyan,
pdftitle={Time Series EDA Report},
pdfpagemode=FullScreen,
}
% --- Custom LaTeX Definitions Placeholder ---
{{LATEX_DEFINITIONS}} % Python script will insert \newcommand definitions here
% Define boolean flags if they don't exist (e.g., for manual compilation)
\ifdefined\ifShowZoomedTimeseries\else\newcommand{\ifShowZoomedTimeseries}{false}\fi
\ifdefined\ifShowYearlyDecomp\else\newcommand{\ifShowYearlyDecomp}{false}\fi
% --- Document Information ---
\title{Time Series Exploratory Data Analysis Report: Hourly Prices}
\author{Generated Automatically}
\date{\reportDateGenerated} % Use the macro defined in Python
% --- Start Document ---
\begin{document}
\maketitle
% --- Overview Section ---
\section*{Report Overview}
\begin{itemize}
\item \textbf{Data Source:} \dataSourceDescription
\item \textbf{Time Series Variable:} `\priceVariableName`
\item \textbf{Time Index Frequency:} \timeIndexFrequency
\item \textbf{Date Range:} \dateRangeStart \ to \dateRangeEnd
\end{itemize}
% --- Section 1: Data Overview ---
\section{Data Overview and Initial Inspection}
Purpose: Understand the basic structure, size, and data types of the dataset. Check the time index integrity.
\subsection*{Key Information}
\begin{itemize}
\item Number of data points (length of the series): \numDataPoints
\item Confirmation of time index format and frequency: \timeIndexConfirmation
\item Presence of other columns/variables: \otherColumnsList
\end{itemize}
\subsection*{Raw Data Sample}
% Placeholder for Table: First 5 Rows
\tableHeadData
\vspace{\baselineskip} % Add some vertical space
% Placeholder for Table: Last 5 Rows
\tableTailData
\subsection*{Data Types}
% Placeholder for Table: Data Types (`df.info()`)
\tableDtypesInfo
% --- Section 2: Descriptive Statistics & Missing Values ---
\section{Descriptive Statistics and Missing Values}
Purpose: Summarize the central tendency, dispersion, and distribution of the price variable and identify data completeness issues. Note any unusual values (like negative prices).
\subsection*{Price Variable Statistics}
% Placeholder for Table: Descriptive Statistics (`df['Price'].describe()`)
\tableDescriptiveStats
\subsection*{Missing Values}
% Placeholder for Table: Count of Missing Values
\tableMissingCounts
\vspace{\baselineskip}
% Placeholder for Table: Percentage of Missing Values
\tableMissingPercentages
\vspace{\baselineskip}
Observations on missing values: \missingValuesObservations % Add a text placeholder
% --- Section 3: Visual Exploration ---
\section{Visual Exploration of Time Series Patterns}
Purpose: Visually identify overall trends, seasonality (daily, weekly, yearly), cycles, outliers, and changes in variance. Investigate interactions between patterns.
\begin{figure}[H] % Use [H] from float package to place figure 'here' if possible
\centering
% Placeholder for Plot: Full Time Series
\includegraphics[width=0.9\textwidth]{\plotFullTimeseries}
\caption{Full Time Series: Price vs. Time.}
\label{fig:full_ts}
\end{figure}
% --- Conditionally include Zoomed Timeseries Plot ---
\ifthenelse{\boolean{\ifShowZoomedTimeseries}}{%
\begin{figure}[H]
\centering
% Placeholder for Plot: Zoomed Time Series
\includegraphics[width=0.9\textwidth]{\plotZoomedTimeseries}
\caption{Zoomed Time Series (Specific Period).}
\label{fig:zoomed_ts}
\end{figure}
}{} % Empty 'else' part - include nothing if false
\begin{figure}[H]
\centering
% Placeholder for Plot: Histogram
\includegraphics[width=0.7\textwidth]{\plotHistogram}
\caption{Distribution of Price Values.}
\label{fig:histogram}
\end{figure}
\subsection*{Seasonal Patterns \& Interactions}
\begin{figure}[H]
\centering
% Placeholder for Plot: Box Plots by Hour
\includegraphics[width=0.9\textwidth]{\plotBoxplotHour}
\caption{Price Distribution by Hour of Day.}
\label{fig:boxplot_hour}
\end{figure}
\begin{figure}[H]
\centering
% Placeholder for Plot: Box Plots by Day of Week
\includegraphics[width=0.9\textwidth]{\plotBoxplotDayofweek}
\caption{Price Distribution by Day of Week.}
\label{fig:boxplot_dayofweek}
\end{figure}
\begin{figure}[H]
\centering
% Placeholder for Plot: Box Plots by Month
\includegraphics[width=0.9\textwidth]{\plotBoxplotMonth}
\caption{Price Distribution by Month.}
\label{fig:boxplot_month}
\end{figure}
\begin{figure}[H]
\centering
% Placeholder for Plot: Box Plots by Year
\includegraphics[width=0.9\textwidth]{\plotBoxplotYear}
\caption{Price Distribution by Year.}
\label{fig:boxplot_year}
\end{figure}
% Optional Seasonal Subseries Plots
\textit{Optional: Seasonal Sub-series plots below.}
\begin{figure}[H]
\centering
% Placeholder for Optional Plot: Seasonal Sub-series Daily
\includegraphics[width=0.9\textwidth]{\plotSeasonalSubseriesDaily}
\caption{Seasonal Sub-series Plot (Daily Pattern).}
\label{fig:subseries_daily}
\end{figure}
\begin{figure}[H]
\centering
% Placeholder for Optional Plot: Seasonal Sub-series Weekly
\includegraphics[width=0.9\textwidth]{\plotSeasonalSubseriesWeekly}
\caption{Seasonal Sub-series Plot (Weekly Pattern).}
\label{fig:subseries_weekly}
\end{figure}
Observations on seasonal interactions: \seasonalInteractionsObservations % Placeholder
% --- Section 4: Time Series Decomposition ---
\section{Time Series Decomposition}
Purpose: Separate the time series into its underlying components: Trend, Seasonality, and Residuals. Assess how well the decomposition captures the main patterns.
Method Used: \decompositionMethodDetails
\begin{figure}[H]
\centering
% Placeholder for Plot: Decomposition (Daily Period)
\includegraphics[width=0.9\textwidth]{\plotDecompositionDaily}
\caption{Time Series Decomposition (Daily Seasonality, Period=24).}
\label{fig:decomp_daily}
\end{figure}
\begin{figure}[H]
\centering
% Placeholder for Plot: Decomposition (Weekly Period)
\includegraphics[width=0.9\textwidth]{\plotDecompositionWeekly}
\caption{Time Series Decomposition (Weekly Seasonality, Period=168).}
\label{fig:decomp_weekly}
\end{figure}
% Optional Yearly Decomposition
\textit{Optional: Yearly decomposition plot below.}
% --- Conditionally include Yearly Decomposition Plot ---
\ifthenelse{\boolean{\ifShowYearlyDecomp}}{%
\begin{figure}[H]
\centering
% Placeholder for Plot: Decomposition (Yearly Period) - Optional
\includegraphics[width=0.9\textwidth]{\plotDecompositionYearly}
\caption{Time Series Decomposition (Yearly Seasonality, Period=8760).}
\label{fig:decomp_yearly}
\end{figure}
}{} % Empty 'else' part - include nothing if false
Observations on decomposition: \decompositionObservations % Placeholder
% --- Section 5: Stationarity Analysis ---
\section{Stationarity Analysis}
Purpose: Determine if the statistical properties (mean, variance, autocorrelation) are constant over time.
Methods: Visual inspection, Augmented Dickey-Fuller (ADF) Test, KPSS Test.
Series Tested: \stationaritySeriesTested
\subsection*{Visual Inspection (Residuals)}
Refer to the trend component in the decomposition plots (Figures \ref{fig:decomp_daily}, \ref{fig:decomp_weekly}).
\begin{figure}[H]
\centering
% Placeholder for Plot: Residuals
\includegraphics[width=0.9\textwidth]{\plotResiduals}
\caption{Residuals from Decomposition (used for stationarity tests).}
\label{fig:residuals}
\end{figure}
\subsection*{Statistical Test Results}
% Placeholder for Table: ADF Test Results
\tableAdfResults
\vspace{\baselineskip}
% Placeholder for Table: KPSS Test Results
\tableKpssResults
\subsection*{Findings}
\stationarityFindingsSummary % Placeholder
% --- Section 6: Autocorrelation Analysis ---
\section{Autocorrelation Analysis}
Purpose: Understand the linear dependence between the series (or tested series) and its past values.
Series Analyzed: \autocorrSeriesAnalyzed
Lags Shown: \autocorrLagsShown
\begin{figure}[H]
\centering
% Placeholder for Plot: ACF
\includegraphics[width=0.9\textwidth]{\plotAcf}
\caption{Autocorrelation Function (ACF).}
\label{fig:acf}
\end{figure}
\begin{figure}[H]
\centering
% Placeholder for Plot: PACF
\includegraphics[width=0.9\textwidth]{\plotPacf}
\caption{Partial Autocorrelation Function (PACF).}
\label{fig:pacf}
\end{figure}
Observations: \autocorrObservations % Placeholder
% --- Section 7: Summary and Implications ---
\section{Analysis Summary and Implications for Forecasting}
Purpose: Synthesize the findings and discuss their relevance for modeling.
\subsection*{Key Findings Summary}
\begin{itemize}
\item \textbf{Trend \& Cycles:} \summaryTrendCycles
\item \textbf{Seasonality:} \summarySeasonality
\item \textbf{Stationarity:} \summaryStationarity
\item \textbf{Autocorrelations:} \summaryAutocorrelations
\item \textbf{Outliers/Volatility:} \summaryOutliersVolatility
\end{itemize}
\subsection*{Implications for Day-Ahead Model}
\begin{itemize}
\item \textbf{Model Choice:} \implicationsModelChoice
\item \textbf{Feature Engineering:} \implicationsFeatureEngineering
\item \textbf{Preprocessing:} \implicationsPreprocessing
\item \textbf{Evaluation:} \implicationsEvaluation
\item \textbf{Probabilistic Forecasting:} \implicationsProbabilistic
\end{itemize}
% --- Section 8: Conclusion ---
\section{Conclusion}
Purpose: Briefly summarize the EDA process.
\conclusionStatement % Placeholder
% --- End Document ---
\end{document}

View File

@ -0,0 +1,166 @@
import logging
import yaml
from pathlib import Path
from pydantic import BaseModel, Field, ValidationError, field_validator # Use BaseModel for direct dict init
from typing import Optional # Use Optional for type hints
# --- Logger Setup ---
logger = logging.getLogger(__name__)
# --- Configuration File Path ---
# Define the default path for the configuration file
CONFIG_YAML_PATH = Path("config.yaml")
# --- Settings Model ---
class Settings(BaseModel):
"""
Application settings loaded from YAML configuration.
This class defines the configuration structure for the forecasting model,
including data paths, logging settings, and analysis parameters.
"""
# -- General Settings --
debug: bool = Field(
default=False,
description="Enable debug mode for detailed logging and latex stderr output",
examples=[True, False]
)
log_level: str = Field(
default="INFO",
description="Logging level for the application",
examples=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
)
# -- IO Settings --
data_file: Path = Field(
default=Path("data/energy_prices.csv"),
description="Path to the input data CSV file relative to project root",
examples=["data/energy_prices.csv", "data/Day-ahead_Prices_60min.csv"]
)
latex_template_file: Optional[Path] = Field(
default=Path("data_analysis/utils/_latex_report_template.tex"),
description="Path to the LTX template file relative to project root",
examples=["data_analysis/utils/_latex_report_template.tex", "data/byo_template.tex"]
)
output_dir: Path = Field(
default=Path("output/reports"),
description="Directory to save generated plots and report artifacts",
examples=["output/reports", "analysis/results"]
)
# -- Zoom Settings (Plotting and Analysis) --
zoom_start_date: Optional[str] = Field(
default=None,
description="Start date for zoomed-in analysis plots (YYYY-MM-DD format)",
examples=["2023-01-01"]
)
zoom_end_date: Optional[str] = Field(
default=None,
description="End date for zoomed-in analysis plots (YYYY-MM-DD format)",
examples=["2023-12-31"]
)
# -- Data Settings --
expected_data_frequency: str = Field(
default="h",
description="Expected frequency of the time series data",
examples=["h", "D", "M", "Y"]
)
@field_validator('log_level')
def validate_log_level(cls, v):
"""Validate that log_level is one of the standard logging levels."""
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
if v.upper() not in valid_levels:
raise ValueError(f"log_level must be one of {valid_levels}")
return v.upper()
@field_validator('expected_data_frequency')
def validate_frequency(cls, v):
"""Validate that frequency is a valid pandas frequency string."""
valid_freqs = ["h", "D", "M", "Y"]
v_lower = v.lower() # Convert input to lowercase for comparison
if v_lower not in [f.lower() for f in valid_freqs]:
raise ValueError(f"expected_data_frequency must be one of {valid_freqs}")
return v_lower # Return normalized lowercase value
@field_validator('zoom_start_date', 'zoom_end_date')
def validate_date_format(cls, v):
"""Validate date format if provided."""
if v is None:
return v
try:
from datetime import datetime
datetime.strptime(v, "%Y-%m-%d")
return v
except ValueError:
raise ValueError("Date must be in YYYY-MM-DD format")
@field_validator('latex_template_file')
def validate_latex_template_file(cls, latex_template_file):
return latex_template_file or cls.model_fields['latex_template_file'].default
@classmethod
def from_yaml(cls, yaml_path: Path) -> 'Settings':
"""
Load settings from a YAML file.
Args:
yaml_path: Path to the YAML configuration file
Returns:
Settings instance with values from the YAML file
Raises:
FileNotFoundError: If the YAML file doesn't exist
yaml.YAMLError: If the YAML file is invalid
ValidationError: If the YAML values don't match the schema
"""
if not yaml_path.exists():
raise FileNotFoundError(f"Configuration file not found: {yaml_path}")
try:
with open(yaml_path, 'r') as f:
config = yaml.safe_load(f)
return cls(**config)
except yaml.YAMLError as e:
logger.error(f"Error parsing YAML file {yaml_path}: {e}")
raise
except Exception as e:
logger.error(f"Error loading settings from {yaml_path}: {e}")
raise
# --- Loading Function ---
def load_settings(config_path: Path = CONFIG_YAML_PATH) -> Settings:
"""Loads settings from a YAML file."""
logger.info(f"Attempting to load configuration from: {config_path.resolve()}")
try:
with open(config_path, 'r') as f:
config_data = yaml.safe_load(f)
if not config_data:
logger.warning(f"Configuration file {config_path} is empty. Using default settings.")
return Settings() # Return default settings if file is empty
settings = Settings(**config_data)
logger.info("Configuration loaded successfully.")
# Update logger level based on loaded settings
logging.getLogger().setLevel(settings.log_level.upper())
logger.info(f"Log level set to: {settings.log_level.upper()}")
logger.debug(settings.model_dump_json(indent=2)) # Log loaded settings at debug level
return settings
except FileNotFoundError:
logger.warning(f"Configuration file {config_path} not found. Using default settings.")
return Settings() # Return default settings if file not found
except yaml.YAMLError as e:
logger.error(f"Error parsing YAML file {config_path}: {e}. Using default settings.")
return Settings() # Return default settings on parse error
except ValidationError as e:
logger.error(f"Configuration validation error: {e}. Using default settings.")
return Settings() # Return default settings on validation error
except Exception as e:
logger.error(f"An unexpected error occurred while loading settings: {e}. Using default settings.")
return Settings() # Catch other potential errors
# --- Global Settings Instance ---
# Load settings when the module is imported
settings = load_settings()

View File

@ -0,0 +1,11 @@
from typing import Optional, Dict, Any
from pydantic import BaseModel
class ReportData(BaseModel):
"""Container for all report-related data."""
descriptive_stats: Optional[Dict[str, Any]] = None
stationarity_tests: Optional[Dict[str, Any]] = None
summary_data: Optional[Dict[str, Any]] = None
imputation_message: Optional[str] = None