557 lines
27 KiB
Python
557 lines
27 KiB
Python
import datetime
|
|
import logging
|
|
import re
|
|
import subprocess
|
|
|
|
from pathlib import Path
|
|
from typing import Optional, Dict, Any
|
|
import shutil
|
|
|
|
import pandas as pd
|
|
|
|
from data_analysis.utils.data_config_model import settings # Assuming settings are configured
|
|
from data_analysis.utils.report_model import ReportData
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
# --- Helper function to format DataFrames/Series as LaTeX tables ---
|
|
CHARS = {
|
|
'&': r'\&',
|
|
'%': r'\%',
|
|
'$': r'\$',
|
|
'#': r'\#',
|
|
'_': r'\_',
|
|
'{': r'\{',
|
|
'}': r'\}',
|
|
'~': r'\textasciitilde{}',
|
|
'^': r'\^{}',
|
|
'\\': r'\textbackslash{}',
|
|
'<': r'\textless{}',
|
|
'>': r'\textgreater{}',
|
|
}
|
|
|
|
def _escape_latex(text: str) -> str:
|
|
"""Escapes special LaTeX characters in a string."""
|
|
# Convert input to string first to handle potential non-string types
|
|
t = str(text)
|
|
# Use a compiled regex for efficiency if called many times
|
|
# The pattern needs to be carefully ordered to handle overlapping keys (e.g., '\' vs '\\') correctly,
|
|
# although the current CHARS doesn't have overlaps. Sorting by length desc is safest.
|
|
pattern = re.compile('|'.join(re.escape(str(key)) for key in sorted(CHARS.keys(), key=lambda item: - len(item))))
|
|
t = pattern.sub(lambda match: CHARS[match.group()], t)
|
|
return t
|
|
|
|
|
|
def dataframe_to_latex(df: Optional[pd.DataFrame], title: Optional[str] = None, caption: Optional[str] = None, label: Optional[str] = None, escape: bool = True) -> Optional[str]:
|
|
"""Converts a pandas DataFrame to a LaTeX tabular environment using booktabs."""
|
|
if df is None or df.empty:
|
|
return None
|
|
|
|
# Prepare DataFrame for LaTeX conversion
|
|
df_copy = df.copy()
|
|
# Include index if it's named or not a simple RangeIndex
|
|
include_index = df_copy.index.name is not None or not isinstance(df_copy.index, pd.RangeIndex)
|
|
|
|
# Escape column names and data if required
|
|
if escape:
|
|
# Ensure column names are strings before escaping
|
|
df_copy.columns = [_escape_latex(str(col)) for col in df_copy.columns]
|
|
if include_index and df_copy.index.name:
|
|
# Ensure index name is a string before escaping
|
|
df_copy.index.name = _escape_latex(str(df_copy.index.name))
|
|
# Escape data - map works element-wise, ensure elements are str first if necessary
|
|
# Using applymap instead of map for broader compatibility
|
|
df_copy = df_copy.map(lambda x: _escape_latex(str(x)))
|
|
|
|
# Determine column format (e.g., 'llr' for left, left, right)
|
|
# Default to left-aligned ('l') for all columns
|
|
num_cols = len(df_copy.columns) + (1 if include_index else 0)
|
|
col_format = "l" * num_cols
|
|
|
|
try:
|
|
# Ensure title and caption are escaped if they exist and escape=True was requested
|
|
# However, dataframe_to_latex itself handles caption/label escaping internally if its `escape` is True.
|
|
# We are setting escape=False because we do it manually above.
|
|
# If a title is provided separately, it should be escaped before adding.
|
|
escaped_title = _escape_latex(str(title)) if title and escape else title
|
|
escaped_caption = _escape_latex(str(caption)) if caption and escape else caption
|
|
|
|
latex_str = df_copy.to_latex(
|
|
index=include_index,
|
|
escape=False, # We already escaped manually if escape=True
|
|
column_format=col_format,
|
|
header=True,
|
|
# Pass potentially pre-escaped caption/title to to_latex's caption
|
|
caption=escaped_caption if escaped_caption else escaped_title,
|
|
label=f"tab:{label}" if label else None,
|
|
position='!htbp', # Placement suggestion
|
|
)
|
|
# Add the pre-escaped title above the table if provided and different from caption
|
|
if escaped_title and escaped_title != escaped_caption:
|
|
# Ensure title is treated as LaTeX command if needed, or just text
|
|
# Using \textbf might require braces if title contains commands
|
|
latex_str = fr"\textbf{{{escaped_title}}}\par\par\medskip{latex_str}" # Already escaped title
|
|
|
|
return latex_str
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to convert DataFrame to LaTeX: {e}", exc_info=True)
|
|
# Escape the error message itself for safe inclusion in LaTeX
|
|
return fr"\textit{{Error generating LaTeX table: {_escape_latex(str(e))}}}"
|
|
|
|
def series_to_latex(series: Optional[pd.Series], title: Optional[str] = None, caption: Optional[str] = None, label: Optional[str] = None, escape: bool = True) -> str:
|
|
"""Converts a pandas Series to a LaTeX table (two columns: Index, Value)."""
|
|
if series is None or series.empty:
|
|
# Ensure the default string is safe for LaTeX
|
|
return r"\textit{N/A}\par"
|
|
# Convert series to DataFrame
|
|
df = series.reset_index()
|
|
# Use clear default column names if none exist, ensure they are strings
|
|
index_name = str(series.index.name) if series.index.name else 'Index'
|
|
value_name = str(series.name) if series.name else 'Value'
|
|
df.columns = [index_name, value_name]
|
|
# Delegate to dataframe_to_latex, passing the escape parameter
|
|
return dataframe_to_latex(df, title=title, caption=caption, label=label, escape=escape)
|
|
|
|
|
|
# --- Report Generation Function (LaTeX) ---
|
|
def compile_latex_report(report_tex_path: Path, output_dir: Path) -> bool:
|
|
"""
|
|
Attempts to compile the LaTeX report using the local LaTeX installation.
|
|
|
|
Args:
|
|
report_tex_path: Path to the .tex file
|
|
output_dir: Directory where the PDF should be saved
|
|
|
|
Returns:
|
|
bool: True if compilation was successful, False otherwise
|
|
"""
|
|
logger.info(f"Attempting to compile LaTeX report: {report_tex_path}")
|
|
|
|
# Create necessary directories
|
|
reports_dir = output_dir / "reports"
|
|
tmp_dir = output_dir / "_tmp"
|
|
reports_dir.mkdir(parents=True, exist_ok=True)
|
|
tmp_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
try:
|
|
# Run pdflatex twice to ensure proper references and table of contents
|
|
for i in range(2):
|
|
logger.info(f"Running pdflatex (attempt {i+1}/2)...")
|
|
result = subprocess.run(
|
|
["pdflatex", "-interaction=nonstopmode", "-output-directory", str(tmp_dir), str(report_tex_path)],
|
|
capture_output=False if settings.debug else True,
|
|
text=True
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
logger.error(f"LaTeX compilation failed (attempt {i+1})")
|
|
return False
|
|
|
|
# Move the PDF to the reports directory
|
|
pdf_path = tmp_dir / f"{report_tex_path.stem}.pdf"
|
|
if pdf_path.exists():
|
|
target_pdf = reports_dir / "report.pdf"
|
|
shutil.move(str(pdf_path), str(target_pdf))
|
|
logger.info(f"Successfully compiled and moved report to: {target_pdf}")
|
|
|
|
# Clean up the _tmp directory
|
|
shutil.rmtree(tmp_dir)
|
|
logger.info("Cleaned up temporary LaTeX files")
|
|
|
|
return True
|
|
else:
|
|
logger.error(f"Expected PDF file not found: {pdf_path}")
|
|
return False
|
|
|
|
except FileNotFoundError:
|
|
logger.error("pdflatex command not found. Please ensure LaTeX is installed and in your PATH.")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error during LaTeX compilation: {e}")
|
|
return False
|
|
|
|
def get_plot_path(key: str, plot_paths: Optional[Dict[str, str]]) -> str:
|
|
"""Get the correct path for a plot file."""
|
|
if plot_paths is None:
|
|
# Return placeholder if the entire dictionary is missing
|
|
return "reports/plots/placeholder.png"
|
|
|
|
# Lookup the specific filename using the key
|
|
filename = plot_paths.get(key)
|
|
|
|
# Construct path or return placeholder if key wasn't found
|
|
return f"reports/plots/{filename}" if filename else "reports/plots/placeholder.png"
|
|
|
|
def _format_latex_command(macro_name: str, value: str) -> str:
|
|
"""Formats a LaTeX \newcommand definition. Assumes value is correctly escaped/formatted."""
|
|
# Creates \newcommand{\macroName}{value}
|
|
# Using simple string concatenation to avoid f-string/raw-string issues.
|
|
return "\\newcommand{\\" + macro_name + "}{" + value + "}"
|
|
|
|
def _format_stationarity_results(results: Optional[Dict[str, Any]], test_name: str) -> str:
|
|
"""Formats stationarity test results dictionary into a LaTeX string."""
|
|
default_na = r"\textit{N/A}"
|
|
if not results:
|
|
return default_na
|
|
|
|
test_data = results.get(test_name.lower())
|
|
if not test_data:
|
|
return default_na
|
|
|
|
# Ensure keys and values are escaped correctly *before* creating the Series
|
|
formatted_data = {}
|
|
for key, value in test_data.items():
|
|
escaped_key = _escape_latex(str(key)) # Escape the key
|
|
if isinstance(value, dict): # Handle Critical Values
|
|
# Escape keys and format values within the string
|
|
cv_str = ", ".join([f"{_escape_latex(k)}: {v:.3f}" for k, v in value.items()])
|
|
formatted_data[escaped_key] = cv_str
|
|
elif isinstance(value, (int, float)):
|
|
# Apply specific formatting for p-value and test statistic
|
|
if 'p-value' in key.lower():
|
|
formatted_data[escaped_key] = f"{value:.4f}"
|
|
elif 'statistic' in key.lower():
|
|
formatted_data[escaped_key] = f"{value:.3f}"
|
|
else:
|
|
# Convert non-float numbers to string
|
|
formatted_data[escaped_key] = str(value)
|
|
else:
|
|
# Escape other string values
|
|
formatted_data[escaped_key] = _escape_latex(str(value))
|
|
|
|
if not formatted_data:
|
|
return default_na
|
|
|
|
series = pd.Series(formatted_data)
|
|
series.name = "Value" # This name doesn't get escaped by default in series_to_latex
|
|
series.index.name = "Metric" # This name doesn't get escaped by default in series_to_latex
|
|
|
|
# Use series_to_latex for table structure, disable its internal escaping
|
|
# as we have already escaped the content. Title also needs pre-escaping.
|
|
escaped_title = _escape_latex(f"{test_name.upper()} Test Results")
|
|
return series_to_latex(series, title=escaped_title, label=f"{test_name.lower()}_results", escape=False)
|
|
|
|
def generate_latex_report(
|
|
output_dir: Path,
|
|
df: Optional[pd.DataFrame],
|
|
report_data: ReportData,
|
|
series_name_stat: Optional[str],
|
|
acf_pacf_plot_paths: Optional[Dict[str, str]] = None,
|
|
decomposition_plot_paths: Optional[Dict[str, str]] = None,
|
|
other_plot_paths: Optional[Dict[str, str]] = None,
|
|
decomposition_model: str = 'additive',
|
|
acf_pacf_lags: Optional[int] = 48,
|
|
template_path: Path = Path("data_analysis/utils/_latex_report_template.tex")
|
|
):
|
|
"""Generates the LaTeX report (.tex file) by filling the template using macros."""
|
|
logger.info(f"Generating LaTeX EDA report using template: {template_path.resolve()}")
|
|
|
|
reports_dir = output_dir / "reports"
|
|
source_plots_dir = reports_dir / "plots" # Define source plot dir
|
|
tmp_dir = output_dir / "_tmp"
|
|
tmp_plots_dir = tmp_dir / "plots" # Define target plot dir within tmp
|
|
reports_dir.mkdir(parents=True, exist_ok=True)
|
|
tmp_dir.mkdir(parents=True, exist_ok=True)
|
|
# Ensure the target plot dir exists and is empty before copying
|
|
if tmp_plots_dir.exists():
|
|
shutil.rmtree(tmp_plots_dir)
|
|
tmp_plots_dir.mkdir()
|
|
shutil.copytree( output_dir / "plots", tmp_plots_dir, dirs_exist_ok=True)
|
|
|
|
report_tex_path = tmp_dir / "eda_report.tex"
|
|
|
|
if not template_path.exists():
|
|
logger.error(f"Report template not found at {template_path.resolve()}. Cannot generate report.")
|
|
raise FileNotFoundError(f"Report template not found: {template_path.resolve()}")
|
|
|
|
try:
|
|
with open(template_path, 'r', encoding='utf-8') as f:
|
|
template = f.read()
|
|
except Exception as e:
|
|
logger.error(f"Failed to read report template {template_path.resolve()}: {e}", exc_info=True)
|
|
raise IOError(f"Failed to read report template {template_path.resolve()}: {e}") from e
|
|
|
|
# --- Prepare LaTeX Definitions ---
|
|
latex_definitions = []
|
|
default_na = r"\textit{N/A}"
|
|
default_text = r"\textit{Not provided - requires manual interpretation or more data.}\medskip"
|
|
|
|
# Refined helper to add definitions
|
|
def add_def(macro_name: str, value: Optional[Any], formatter=None, default=default_na, escape_if_plain: bool = True):
|
|
"""
|
|
Adds a LaTeX definition. Handles None values, applies formatter if provided,
|
|
and escapes the result if it's considered plain text.
|
|
|
|
Args:
|
|
macro_name: The name of the LaTeX macro (without backslash).
|
|
value: The value for the macro.
|
|
formatter: A function to format the value (e.g., dataframe_to_latex).
|
|
If None, str() is used. If the formatter returns LaTeX code,
|
|
set escape_if_plain=False.
|
|
default: The default string to use if value is None. Assumed safe for LaTeX.
|
|
escape_if_plain: If True and the final value is not known to be LaTeX
|
|
(i.e., not from specific formatters or defaults), apply _escape_latex.
|
|
"""
|
|
final_str = default
|
|
is_known_latex = False
|
|
|
|
if value is not None:
|
|
if formatter:
|
|
final_str = formatter(value)
|
|
# Assume formatters producing tables/complex output return valid LaTeX
|
|
if formatter in [dataframe_to_latex, series_to_latex, _format_stationarity_results]:
|
|
is_known_latex = True
|
|
else:
|
|
final_str = str(value) # Default to string conversion
|
|
else:
|
|
# Value is None, using default. Check if default is known LaTeX.
|
|
if default in [default_na, default_text]:
|
|
is_known_latex = True
|
|
|
|
# Convert to string one last time in case formatter returned non-string
|
|
final_str = str(final_str)
|
|
|
|
# Escape the result *unless* it's known LaTeX or escaping is turned off
|
|
if escape_if_plain and not is_known_latex:
|
|
final_str = _escape_latex(final_str)
|
|
|
|
latex_definitions.append(_format_latex_command(macro_name, final_str))
|
|
|
|
|
|
# Helper for paths - Now points to plots/filename within the _tmp directory
|
|
# Uses example-image-a as the default placeholder
|
|
def add_path_def(macro_name: str, path_dict: Optional[Dict[str, str]], key: str, default_filename='example-image-a'): # Changed default
|
|
filename = default_filename
|
|
is_placeholder = True # Flag to track if we're using the placeholder
|
|
source_filename = None
|
|
|
|
if path_dict and key in path_dict and path_dict[key]:
|
|
actual_filename_from_dict = Path(path_dict[key]).name
|
|
if actual_filename_from_dict: # Check if it's not an empty string
|
|
filename = actual_filename_from_dict
|
|
source_filename = path_dict[key] # Keep original potentially relative path for source lookup
|
|
is_placeholder = False
|
|
# else: filename remains default_filename ('example-image-a')
|
|
|
|
# Construct path for \includegraphics
|
|
# If it's a real plot, use the "plots/" prefix for the copied location.
|
|
# If it's the placeholder, use the name directly (LaTeX finds it).
|
|
if not is_placeholder:
|
|
formatted_path = f"plots/{filename}".replace('\\', '/')
|
|
else:
|
|
# Ensure placeholder name itself doesn't get 'plots/' prefix
|
|
formatted_path = Path(filename).name # Use Path().name just in case
|
|
|
|
# Pass the path string to add_def, explicitly disable escaping
|
|
add_def(macro_name, formatted_path, escape_if_plain=False)
|
|
|
|
# Copy the actual plot file only if it's NOT the placeholder
|
|
if not is_placeholder and source_filename:
|
|
# Resolve source relative to the main reports/plots dir
|
|
source_file_path = source_plots_dir / Path(source_filename).name
|
|
target_file_path = tmp_plots_dir / filename # Target uses just the filename
|
|
if source_file_path.is_file():
|
|
try:
|
|
shutil.copy2(source_file_path, target_file_path)
|
|
except Exception as copy_e:
|
|
logger.warning(f"Could not copy plot file {source_file_path} to {target_file_path}: {copy_e}")
|
|
# else: # Optionally log if source plot missing
|
|
# logger.warning(f"Source plot file not found: {source_file_path}")
|
|
|
|
# Return the boolean flag indicating if it was a real plot or placeholder
|
|
return not is_placeholder
|
|
|
|
|
|
# --- Generate Definitions using the new add_def ---
|
|
# Basic Info
|
|
add_def("reportDateGenerated", datetime.date.today(), formatter=lambda d: d.strftime("%Y-%m-%d"))
|
|
add_def("dataSourceDescription", f"Hourly prices from {settings.data_file.name}")
|
|
add_def("priceVariableName", settings.data_file.stem)
|
|
|
|
# Info from DataFrame
|
|
if df is not None and not df.empty:
|
|
add_def("dateRangeStart", df.index.min().date())
|
|
add_def("dateRangeEnd", df.index.max().date())
|
|
add_def("numDataPoints", len(df))
|
|
freq_info = "Irregular/Not Inferred"
|
|
if isinstance(df.index, pd.DatetimeIndex):
|
|
try:
|
|
inferred = pd.infer_freq(df.index)
|
|
freq_info = inferred if inferred else freq_info
|
|
except Exception: # Handle potential errors in infer_freq
|
|
logger.warning("Could not infer frequency.", exc_info=True)
|
|
add_def("timeIndexFrequency", f"Hourly (Inferred: {freq_info})")
|
|
add_def("timeIndexConfirmation", f"DatetimeIndex, Hourly (Inferred: {freq_info})")
|
|
# Escape column names individually before joining
|
|
all_cols_str = ", ".join([_escape_latex(str(c)) for c in df.columns])
|
|
add_def("otherColumnsList", all_cols_str if all_cols_str else "None", escape_if_plain=False) # Already escaped
|
|
else:
|
|
add_def("dateRangeStart", None, default=default_na)
|
|
add_def("dateRangeEnd", None, default=default_na)
|
|
add_def("numDataPoints", None, default=default_na)
|
|
add_def("timeIndexFrequency", None, default=default_na)
|
|
add_def("timeIndexConfirmation", None, default=default_na)
|
|
add_def("otherColumnsList", "None") # Simple string, escape
|
|
|
|
# Section 1 Tables
|
|
summary_data = report_data.summary_data or {}
|
|
add_def("tableHeadData", summary_data.get('head'),
|
|
formatter=lambda df_val: dataframe_to_latex(df_val, title="First 5 Rows", label="head", escape=True),
|
|
escape_if_plain=False, default=default_na)
|
|
add_def("tableTailData", summary_data.get('tail'),
|
|
formatter=lambda df_val: dataframe_to_latex(df_val, title="Last 5 Rows", label="tail", escape=True),
|
|
escape_if_plain=False, default=default_na)
|
|
add_def("tableDtypesInfo", summary_data.get('dtypes'),
|
|
formatter=lambda s: series_to_latex(s, title="Data Types", label="dtypes", escape=True),
|
|
escape_if_plain=False, default=default_na)
|
|
|
|
# Section 2 Tables
|
|
desc_stats = report_data.descriptive_stats or {}
|
|
escaped_desc_title = _escape_latex(f"Descriptive Statistics ({settings.data_file.stem})")
|
|
add_def("tableDescriptiveStats", desc_stats.get('desc_price'),
|
|
formatter=lambda s: series_to_latex(s, title=escaped_desc_title, label="desc_price", escape=True),
|
|
escape_if_plain=False, default=default_na)
|
|
|
|
missing_counts = summary_data.get('missing')
|
|
add_def("tableMissingCounts", missing_counts,
|
|
formatter=lambda s: series_to_latex(s, title="Missing Value Counts (Post-Imputation)", label="missing_counts", escape=True),
|
|
escape_if_plain=False, default=default_na)
|
|
|
|
missing_pct = None
|
|
if missing_counts is not None and df is not None and len(df) > 0:
|
|
missing_pct = (missing_counts / len(df)) * 100
|
|
missing_pct = missing_pct.round(3)
|
|
|
|
add_def("tableMissingPercentages", missing_pct,
|
|
formatter=lambda s: series_to_latex(s, title="Missing Value Percentage (Post-Imputation)", label="missing_pct", escape=True),
|
|
escape_if_plain=False, default=default_na)
|
|
|
|
add_def("missingValuesObservations", report_data.imputation_message, default="Missing value check information not available.")
|
|
|
|
# Section 3 Plots
|
|
add_path_def("plotFullTimeseries", other_plot_paths, 'full_timeseries')
|
|
# Capture the return value of add_path_def to see if a real plot was added
|
|
show_zoomed = add_path_def("plotZoomedTimeseries", other_plot_paths, 'zoomed_timeseries')
|
|
add_def("ifShowZoomedTimeseries", "true" if show_zoomed else "false", escape_if_plain=False) # Add boolean macro
|
|
|
|
add_path_def("plotHistogram", other_plot_paths, 'histogram_price')
|
|
add_path_def("plotBoxplotHour", other_plot_paths, 'boxplot_hour')
|
|
add_path_def("plotBoxplotDayofweek", other_plot_paths, 'boxplot_dayofweek')
|
|
add_path_def("plotBoxplotMonth", other_plot_paths, 'boxplot_month')
|
|
add_path_def("plotBoxplotYear", other_plot_paths, 'boxplot_year')
|
|
add_path_def("plotSeasonalSubseriesDaily", other_plot_paths, 'seasonal_subseries_daily')
|
|
add_path_def("plotSeasonalSubseriesWeekly", other_plot_paths, 'seasonal_subseries_weekly')
|
|
add_def("seasonalInteractionsObservations", None, default=default_text, escape_if_plain=False)
|
|
|
|
# Section 4 Decomposition
|
|
add_def("decompositionMethodDetails", f"Statsmodels seasonal_decompose (model='{decomposition_model}')")
|
|
add_path_def("plotDecompositionDaily", decomposition_plot_paths, 'daily')
|
|
add_path_def("plotDecompositionWeekly", decomposition_plot_paths, 'weekly')
|
|
# Capture the return value for yearly decomp
|
|
show_yearly = add_path_def("plotDecompositionYearly", decomposition_plot_paths, 'yearly')
|
|
add_def("ifShowYearlyDecomp", "true" if show_yearly else "false", escape_if_plain=False) # Add boolean macro
|
|
|
|
add_def("decompositionObservations", None, default=default_text, escape_if_plain=False)
|
|
|
|
# Section 5 Stationarity
|
|
stationarity_tests = report_data.stationarity_tests or {}
|
|
add_def("stationaritySeriesTested", series_name_stat)
|
|
add_path_def("plotResiduals", other_plot_paths, 'residuals')
|
|
|
|
add_def("tableAdfResults", stationarity_tests,
|
|
formatter=lambda tests: _format_stationarity_results(tests, "ADF"),
|
|
escape_if_plain=False, default=default_na)
|
|
add_def("tableKpssResults", stationarity_tests,
|
|
formatter=lambda tests: _format_stationarity_results(tests, "KPSS"),
|
|
escape_if_plain=False, default=default_na)
|
|
|
|
findings_summary = r"\textit{Analysis requires both ADF and KPSS results.}"
|
|
try:
|
|
adf_res = stationarity_tests.get('adf')
|
|
kpss_res = stationarity_tests.get('kpss')
|
|
adf_p = adf_res.get('p-value') if adf_res else None
|
|
kpss_p = kpss_res.get('p-value') if kpss_res else None
|
|
|
|
if adf_p is not None and kpss_p is not None:
|
|
if adf_p < 0.05 and kpss_p >= 0.05:
|
|
findings_summary = "Tests suggest the series is stationary (ADF rejects H0, KPSS fails to reject H0)."
|
|
elif adf_p >= 0.05 and kpss_p < 0.05:
|
|
findings_summary = "Tests suggest the series is non-stationary (trend-stationary) and requires differencing (ADF fails to reject H0, KPSS rejects H0)."
|
|
elif adf_p < 0.05 and kpss_p < 0.05:
|
|
findings_summary = "Test results conflict: ADF suggests stationarity, KPSS suggests non-stationarity. May indicate difference-stationarity."
|
|
else:
|
|
findings_summary = "Tests suggest the series is non-stationary (unit root present) and requires differencing (Both fail to reject H0)."
|
|
elif adf_p is not None:
|
|
findings_summary = f"ADF test p-value: {adf_p:.4f}. Stationarity conclusion requires KPSS test."
|
|
elif kpss_p is not None:
|
|
findings_summary = f"KPSS test p-value: {kpss_p:.4f}. Stationarity conclusion requires ADF test."
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Could not generate stationarity summary: {e}")
|
|
findings_summary = r"\textit{Error generating summary.}"
|
|
|
|
add_def("stationarityFindingsSummary", findings_summary)
|
|
|
|
# Section 6 Autocorrelation
|
|
add_def("autocorrSeriesAnalyzed", series_name_stat)
|
|
add_def("autocorrLagsShown", acf_pacf_lags)
|
|
add_path_def("plotAcf", acf_pacf_plot_paths, 'acf')
|
|
add_path_def("plotPacf", acf_pacf_plot_paths, 'pacf')
|
|
add_def("autocorrObservations", None, default=default_text, escape_if_plain=False)
|
|
|
|
# Section 7 Summary & Implications
|
|
add_def("summaryTrendCycles", None, default=default_text, escape_if_plain=False)
|
|
add_def("summarySeasonality", None, default=default_text, escape_if_plain=False)
|
|
add_def("summaryStationarity", None, default=default_text, escape_if_plain=False)
|
|
add_def("summaryAutocorrelations", None, default=default_text, escape_if_plain=False)
|
|
add_def("summaryOutliersVolatility", None, default=default_text, escape_if_plain=False)
|
|
add_def("implicationsModelChoice", None, default=default_text, escape_if_plain=False)
|
|
add_def("implicationsFeatureEngineering", None, default=default_text, escape_if_plain=False)
|
|
add_def("implicationsPreprocessing", None, default=default_text, escape_if_plain=False)
|
|
add_def("implicationsEvaluation", None, default=default_text, escape_if_plain=False)
|
|
add_def("implicationsProbabilistic", None, default=default_text, escape_if_plain=False)
|
|
|
|
# Section 8 Conclusion
|
|
add_def("conclusionStatement", None, default=default_text, escape_if_plain=False)
|
|
|
|
|
|
# --- Apply Definitions to Template ---
|
|
definitions_block = "\n".join(latex_definitions)
|
|
if "{{LATEX_DEFINITIONS}}" not in template:
|
|
logger.error("Placeholder '{{LATEX_DEFINITIONS}}' not found in the LaTeX template preamble.")
|
|
raise ValueError("Template missing '{{LATEX_DEFINITIONS}}' placeholder in preamble.")
|
|
report_content = template.replace("{{LATEX_DEFINITIONS}}", definitions_block)
|
|
|
|
# --- Write Report ---
|
|
try:
|
|
with open(report_tex_path, 'w', encoding='utf-8') as f:
|
|
f.write(report_content)
|
|
logger.info(f"Successfully generated LaTeX report source: {report_tex_path}")
|
|
|
|
# --- Copy Plots ---
|
|
# This is now handled within add_path_def to copy files individually
|
|
# logger.info(f"Copying plots from {source_plots_dir} to {tmp_plots_dir}")
|
|
# try:
|
|
# shutil.copytree(source_plots_dir, tmp_plots_dir, dirs_exist_ok=True) # dirs_exist_ok=True allows overwriting
|
|
# except FileNotFoundError:
|
|
# logger.error(f"Source plots directory not found: {source_plots_dir}")
|
|
# raise # Re-raise error if plots dir is essential
|
|
# except Exception as e:
|
|
# logger.error(f"Failed to copy plots directory: {e}", exc_info=True)
|
|
# raise # Re-raise error
|
|
|
|
# Attempt to compile the report
|
|
if compile_latex_report(report_tex_path, output_dir):
|
|
logger.info("LaTeX report successfully compiled to PDF")
|
|
else:
|
|
logger.warning("LaTeX compilation failed. Check logs above. The .tex file is available for manual compilation.")
|
|
# Consider raising an error if PDF generation is critical
|
|
# raise RuntimeError("LaTeX compilation failed.")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to write LaTeX report to {report_tex_path}: {e}", exc_info=True)
|
|
raise IOError(f"Failed to write LaTeX report to {report_tex_path}: {e}") from e
|