import logging
from pathlib import Path
import pandas as pd
import json
from typing import Optional, Dict, List, Any
# Use utils for config if that's the structure
from data_analysis.utils.data_config_model import settings
import datetime

logger = logging.getLogger(__name__)

# --- Import data handling functions from io ---
from data_analysis.io.data_handling import (
    load_and_prepare_data,
    get_data_summary,
    get_descriptive_stats,
    PRICE_COL, # Standardized price column name
    PRICE_COL_RAW # Raw price column name (needed for check below)
)
# --- Import analysis functions from analysis ---
from .data import (
    perform_decomposition,
    perform_stationarity_tests,
)
# --- Import plotting functions ---
from data_analysis.io.plotting import (
    plot_full_time_series,
    plot_zoomed_time_series,
    plot_boxplot_by_period,
    plot_histogram,
    plot_decomposition as plot_decomposition_results, # Rename to avoid clash
    plot_residuals,
    plot_acf_pacf,
    plot_seasonal_subseries,
    plot_cross_correlation,
    plot_weekly_autocorrelation
)
# --- Import report generator ---
from ..io.report import generate_latex_report
from data_analysis.utils.report_model import ReportData


# --- Modified Pipeline Function ---
def run_eda_pipeline():
    """
    Orchestrates the Exploratory Data Analysis process using loaded settings
    and generates a LaTeX report.
    """
    logger.info("Starting Exploratory Data Analysis Pipeline (LaTeX Report)...")
    output_dir = settings.output_dir
    plots_dir = output_dir / "plots" # Define plots subdirectory

    # Ensure output directories exist
    try:
        output_dir.mkdir(parents=True, exist_ok=True)
        plots_dir.mkdir(parents=True, exist_ok=True)
        logger.info(f"Output directory set to: {output_dir.resolve()}")
        logger.info(f"Plots directory set to: {plots_dir.resolve()}")
    except Exception as e:
        logger.error(f"Failed to create output directories: {e}", exc_info=True)
        raise SystemExit(1) from e

    # --- Data Holders ---
    df: Optional[pd.DataFrame] = None
    summary_data_dict: Optional[dict] = None
    desc_stats_price: Optional[pd.Series] = None
    residuals_daily: Optional[pd.Series] = None
    residuals_weekly: Optional[pd.Series] = None
    stationarity_results_dict: Optional[dict] = None
    series_name_stat_tested: Optional[str] = None

    # --- Plot Path Collectors ---
    other_plot_paths: Dict[str, str] = {}
    acf_pacf_plot_paths: Dict[str, str] = {}
    decomposition_plot_paths: Dict[str, str] = {}


    # --- Pipeline Steps ---

    # 1. Load Data
    logger.info("--- Step 1: Load Data ---")
    # Store initial raw state temporarily to check missing values before preparation
    df_raw, err_load = pd.read_csv(settings.data_file, header=0), None
    initial_missing_price = 0
    if PRICE_COL_RAW in df_raw.columns:
         # Check missing in the raw numeric column before full processing
         initial_missing_price = pd.to_numeric(df_raw[PRICE_COL_RAW], errors='coerce').isnull().sum()
    else:
         logger.warning(f"Raw price column '{PRICE_COL_RAW}' not found for initial missing value check.")

    df, err = load_and_prepare_data(settings.data_file)
    if err or df is None:
        logger.error(f"Data loading failed: {err or 'Unknown error'}. Stopping pipeline.")
        raise SystemExit(1)
    logger.info(f"Data loaded successfully. Shape: {df.shape}")
    logger.info(f"Columns: {', '.join(df.columns)}")

    # Construct imputation message based on initial check and final state
    imputation_msg = "No missing price values detected."
    final_missing_price = df[PRICE_COL].isnull().sum() # Should be 0 after load_and_prepare
    if initial_missing_price > 0:
         if final_missing_price == 0:
              imputation_msg = f"{initial_missing_price} missing price value(s) were detected and imputed (ffill/bfill)."
         else:
              imputation_msg = f"{initial_missing_price} missing price value(s) were detected, imputation may be incomplete ({final_missing_price} remain)."
    elif df.isnull().sum().sum() > final_missing_price: # Check if other columns have NaNs
         imputation_msg = "Missing values detected in non-price columns (if any). Price column had no missing values."


    # 2. Initial Inspection & Summary Stats
    logger.info("--- Step 2: Initial Inspection & Summary ---")
    summary_data_dict, err = get_data_summary(df)
    summary_file_path = output_dir / "summary_data.txt"
    if err:
        logger.error(f"Failed to get data summary: {err}")
    elif summary_data_dict:
        logger.info(f"Saving data summary to {summary_file_path}")
        try:
            with open(summary_file_path, 'w') as f:
                f.write("--- Data Summary ---\n\n")
                f.write(f"Data Source: {settings.data_file.name}\n")
                f.write(f"Date Range: {df.index.min()} to {df.index.max()}\n")
                f.write(f"Number of Points: {len(df)}\n\n")
                f.write("First 5 Rows:\n")
                f.write(summary_data_dict['head'].to_string())
                f.write("\n\nLast 5 Rows:\n")
                f.write(summary_data_dict['tail'].to_string())
                f.write("\n\nData Types:\n")
                f.write(summary_data_dict['dtypes'].to_string())
                f.write("\n\nMissing Value Counts (Post Initial Handling):\n") # Updated comment
                f.write(summary_data_dict['missing'].to_string())
                f.write("\n")
        except IOError as e:
            logger.error(f"Failed to write data summary to {summary_file_path}: {e}")
        # Log summaries as well
        logger.info(f"Head:\n{summary_data_dict['head'].to_string()}")
        logger.info(f"Tail:\n{summary_data_dict['tail'].to_string()}")
        logger.info(f"Data Types:\n{summary_data_dict['dtypes']}")
        # Keep it for later
        # logger.info(f"Missing Values (Post Initial Handling):\n{summary_data_dict['missing']}")


    # Descriptive Stats
    desc_stats_price, err = get_descriptive_stats(df, price_col=PRICE_COL)
    desc_stats_file_path = output_dir / "descriptive_stats_price.csv" # Make filename specific
    if err:
        logger.error(f"Failed to get descriptive stats for {PRICE_COL}: {err}")
    elif desc_stats_price is not None:
         logger.info(f"Saving price descriptive stats to {desc_stats_file_path}")
         try:
             # Ensure it's a Series before calling to_csv with header=True
             if isinstance(desc_stats_price, pd.Series):
                 desc_stats_price.to_csv(desc_stats_file_path, header=True)
             else: # If it returns DataFrame (unlikely for single col but safe)
                 desc_stats_price.to_csv(desc_stats_file_path)
         except IOError as e:
             logger.error(f"Failed to write price descriptive stats to {desc_stats_file_path}: {e}")
         logger.info(f"Price Descriptive Stats:\n{desc_stats_price.to_string()}")


    # 3. Visualizations (Main Price Series)
    logger.info("--- Step 3: Visualizations (Price) ---")
    plot_name = "01_full_timeseries.png"
    err = plot_full_time_series(df, PRICE_COL, plots_dir / plot_name)
    if not err: other_plot_paths['full_timeseries'] = plot_name
    else: logger.warning(f"Plotting error (full series): {err}")

    if settings.zoom_start_date and settings.zoom_end_date:
        plot_name = "02_zoomed_timeseries.png"
        err = plot_zoomed_time_series(df, PRICE_COL, settings.zoom_start_date, settings.zoom_end_date, plots_dir / plot_name)
        if not err: other_plot_paths['zoomed_timeseries'] = plot_name
        else: logger.warning(f"Plotting error (zoomed series): {err}")

    for period in ['hour', 'dayofweek', 'month', 'year']:
        plot_name = f"03_boxplot_{period}.png"
        err = plot_boxplot_by_period(df, PRICE_COL, period, plots_dir / plot_name)
        if not err: other_plot_paths[f'boxplot_{period}'] = plot_name
        else: logger.warning(f"Plotting error (boxplot {period}): {err}")

    plot_name = "04_histogram_price.png"
    err = plot_histogram(df, PRICE_COL, plots_dir / plot_name)
    if not err: other_plot_paths['histogram_price'] = plot_name
    else: logger.warning(f"Plotting error (histogram): {err}")

    # Optional: Seasonal Subseries Plots
    plot_name = "04a_seasonal_subseries_daily.png"
    err = plot_seasonal_subseries(df, PRICE_COL, period=24, period_name="Daily", output_path=plots_dir / plot_name)
    if not err: other_plot_paths['seasonal_subseries_daily'] = plot_name
    else: logger.warning(f"Plotting error (subseries daily): {err}")

    if len(df) > 168: # Check if enough data for weekly
         plot_name = "04b_seasonal_subseries_weekly.png"
         err = plot_seasonal_subseries(df, PRICE_COL, period=168, period_name="Weekly", output_path=plots_dir / plot_name)
         if not err: other_plot_paths['seasonal_subseries_weekly'] = plot_name
         else: logger.warning(f"Plotting error (subseries weekly): {err}")


    # 4. Decomposition
    logger.info("--- Step 4: Decomposition ---")
    residuals_for_analysis: Optional[pd.Series] = None # Track which residuals to use later

    # Daily
    decomp_daily, err = perform_decomposition(df[PRICE_COL], model='additive', period=24)
    if err: logger.error(f"Daily decomposition failed: {err}")
    elif decomp_daily:
        plot_name = "05_decomposition_daily.png"
        err = plot_decomposition_results(decomp_daily, "Daily (Period=24)", plots_dir / plot_name)
        if not err: decomposition_plot_paths['daily'] = plot_name
        else: logger.warning(f"Plotting error (daily decomp): {err}")

        residuals_daily = decomp_daily.resid.dropna()
        plot_name = "06_residuals_daily.png"
        err = plot_residuals(residuals_daily, "Daily Decomp", plots_dir / plot_name)
        # Save path regardless of error, report might reference it
        other_plot_paths['residuals_daily'] = plot_name
        if err: logger.warning(f"Plotting error (daily residuals): {err}")
        if not residuals_daily.empty: residuals_for_analysis = residuals_daily # Prefer daily initially

    # Weekly
    if len(df) >= 168 * 2:
        decomp_weekly, err = perform_decomposition(df[PRICE_COL], model='additive', period=168)
        if err: logger.error(f"Weekly decomposition failed: {err}")
        elif decomp_weekly:
            plot_name = "07_decomposition_weekly.png"
            err = plot_decomposition_results(decomp_weekly, "Weekly (Period=168)", plots_dir / plot_name)
            if not err: decomposition_plot_paths['weekly'] = plot_name
            else: logger.warning(f"Plotting error (weekly decomp): {err}")

            residuals_weekly = decomp_weekly.resid.dropna()
            plot_name = "08_residuals_weekly.png"
            err = plot_residuals(residuals_weekly, "Weekly Decomp", plots_dir / plot_name)
            other_plot_paths['residuals_weekly'] = plot_name
            if err: logger.warning(f"Plotting error (weekly residuals): {err}")
            if not residuals_weekly.empty: residuals_for_analysis = residuals_weekly # Prefer weekly if available
    else:
        logger.warning("Skipping weekly decomposition, data length insufficient.")

    # Decide which residuals plot to link in stationarity section
    if residuals_for_analysis is residuals_weekly:
         other_plot_paths['residuals'] = other_plot_paths.get('residuals_weekly', 'placeholder.png')
         series_name_stat_tested = "Weekly Residuals"
    elif residuals_for_analysis is residuals_daily:
         other_plot_paths['residuals'] = other_plot_paths.get('residuals_daily', 'placeholder.png')
         series_name_stat_tested = "Daily Residuals"
    else:
         series_name_stat_tested = None # No residuals available for tests


    # 5. Stationarity Analysis
    logger.info("--- Step 5: Stationarity Analysis ---")
    stationarity_file_path = output_dir / "stationarity_tests.json"
    if series_name_stat_tested and residuals_for_analysis is not None and not residuals_for_analysis.empty:
        logger.info(f"Performing tests on: {series_name_stat_tested}")
        stationarity_results_dict, err = perform_stationarity_tests(residuals_for_analysis)
        if err: logger.error(f"Stationarity tests failed: {err}")
        elif stationarity_results_dict:
            logger.info(f"Saving stationarity test results to {stationarity_file_path}")
            try:
                # Convert numpy arrays/types in critical values to lists for JSON serialization
                adf_res = stationarity_results_dict.get('adf', {})
                kpss_res = stationarity_results_dict.get('kpss', {})
                adf_crit = adf_res.get('Critical Values', {})
                kpss_crit = kpss_res.get('Critical Values', {})
                if isinstance(adf_crit, dict):
                    adf_res['Critical Values'] = {k: float(v) for k, v in adf_crit.items()}
                if isinstance(kpss_crit, dict):
                    kpss_res['Critical Values'] = {k: float(v) for k, v in kpss_crit.items()}

                results_to_save = {
                    "series_tested": series_name_stat_tested,
                    "adf": adf_res,
                    "kpss": kpss_res
                 }
                with open(stationarity_file_path, 'w') as f:
                    json.dump(results_to_save, f, indent=4)

            except (IOError, TypeError) as e:
                 logger.error(f"Failed to write stationarity results to {stationarity_file_path}: {e}")

            # Log key results
            logger.info(f"Stationarity Test Results ({series_name_stat_tested}):")
            if 'adf' in stationarity_results_dict and stationarity_results_dict['adf']:
                logger.info(f"  ADF p-value: {stationarity_results_dict['adf'].get('p-value', 'N/A'):.4f}")
            if 'kpss' in stationarity_results_dict and stationarity_results_dict['kpss']:
                # Handle string p-values from KPSS
                kpss_p = stationarity_results_dict['kpss'].get('p-value', 'N/A')
                if isinstance(kpss_p, str):
                    logger.info(f"  KPSS p-value: {kpss_p}")
                else:
                    logger.info(f"  KPSS p-value: {kpss_p:.4f}")
    else:
         logger.warning("Skipping Stationarity Analysis as no suitable residual series is available.")


    # 6. Autocorrelation Analysis
    logger.info("--- Step 6: Autocorrelation Analysis ---")
    # Import plot_acf, plot_pacf from statsmodels graphics directly for saving
    from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
    import matplotlib.pyplot as plt

    if series_name_stat_tested and residuals_for_analysis is not None and not residuals_for_analysis.empty:
        series_name_acf = series_name_stat_tested.lower().replace(' ','_')
        base_name = f"09_{series_name_acf}"
        err_acf = None; err_pacf = None
        try:
             # Create figure and axes explicitly
             fig_acf, ax_acf = plt.subplots()
             plot_acf(residuals_for_analysis, lags=48, ax=ax_acf, title=f'ACF - {series_name_stat_tested}')
             plot_name_acf = f"{base_name}_acf.png"
             fig_acf.savefig(plots_dir / plot_name_acf)
             plt.close(fig_acf) # Close figure after saving
             acf_pacf_plot_paths['acf'] = plot_name_acf
        except Exception as e: err_acf = e

        try:
             # Create figure and axes explicitly
             fig_pacf, ax_pacf = plt.subplots()
             plot_pacf(residuals_for_analysis, lags=48, ax=ax_pacf, title=f'PACF - {series_name_stat_tested}', method='ywm')
             plot_name_pacf = f"{base_name}_pacf.png"
             fig_pacf.savefig(plots_dir / plot_name_pacf)
             plt.close(fig_pacf) # Close figure after saving
             acf_pacf_plot_paths['pacf'] = plot_name_pacf
        except Exception as e: err_pacf = e

        if err_acf: logger.warning(f"Plotting error (ACF for {series_name_stat_tested}): {err_acf}")
        if err_pacf: logger.warning(f"Plotting error (PACF for {series_name_stat_tested}): {err_pacf}")

        # Add Weekly Autocorrelation Analysis
        try:
            plot_name = f"09c_weekly_autocorr_{series_name_acf}.png"
            err = plot_weekly_autocorrelation(
                series=residuals_for_analysis,
                series_name=series_name_stat_tested,
                output_path=plots_dir / plot_name,
                max_weeks=4
            )
            if not err: acf_pacf_plot_paths['weekly_autocorr'] = plot_name
            else: logger.warning(f"Plotting error (weekly autocorrelation): {err}")
        except Exception as e:
            logger.warning(f"Error in weekly autocorrelation analysis: {e}")

    else:
        logger.warning("Skipping Autocorrelation Analysis as no suitable series is available.")


    # 7. Exogenous Variable Analysis (if any exist)
    logger.info("--- Step 7: Exogenous Variable Analysis ---")
    logger.info("---         There are none.... Skipping ---")


    # 8. Generate LaTeX Report
    logger.info("--- Step 8: Generate LaTeX Report ---")
    
    # --- Determine Decomposition Model and ACF/PACF Lags Used ---
    # These are currently hardcoded in the pipeline steps
    decomp_model_used = 'additive'
    acf_pacf_lags_used = 48


    # Create ReportData object, now including imputation_message
    report_data = ReportData(
        descriptive_stats={'desc_price': desc_stats_price} if desc_stats_price is not None else None,
        stationarity_tests=stationarity_results_dict,
        summary_data=summary_data_dict, # Pass the summary dict directly
        imputation_message=imputation_msg # Pass the generated message
    )
    try:
        generate_latex_report(
            output_dir=output_dir,
            df=df,
            report_data=report_data,
            series_name_stat=series_name_stat_tested,
            acf_pacf_plot_paths=acf_pacf_plot_paths,
            decomposition_plot_paths=decomposition_plot_paths,
            other_plot_paths=other_plot_paths,
            decomposition_model=decomp_model_used, # Pass the model used
            acf_pacf_lags=acf_pacf_lags_used, # Pass the lags used
            template_path=settings.latex_template_file
        )
    except (FileNotFoundError, IOError, ValueError, RuntimeError) as e:
         logger.error(f"Report generation failed: {e}", exc_info=True)
         # Decide if pipeline should stop or continue
         # raise SystemExit(1) from e # Option to stop pipeline

    logger.info(f"EDA Pipeline execution finished. Review logs and generated files in {output_dir}.")
    # The message about compiling manually is now handled within generate_latex_report if compilation fails
    # logger.info(f"Compile the report: cd '{output_dir.resolve()}' && pdflatex eda_report.tex")