Files
entrix_case_challange/data_analysis_run.py
2025-05-06 09:11:36 +02:00

74 lines
2.5 KiB
Python

import argparse
import logging
import sys
from pathlib import Path
import time
# Import necessary components from your project structure
from data_analysis.utils.data_config_model import load_settings
from data_analysis.analysis.pipeline import run_eda_pipeline
# Silence overly verbose libraries if needed (e.g., matplotlib)
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING) # Example: set to WARNING or ERROR
# --- Basic Logging Setup ---
# Configure logging early to catch basic issues.
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)-7s - %(message)s',
datefmt='%H:%M:%S')
# Get the root logger
logger = logging.getLogger()
# --- Argument Parsing ---
def parse_arguments():
"""Parses command-line arguments."""
parser = argparse.ArgumentParser(
description="Run the Energy Forecasting EDA pipeline.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'-c', '--config',
type=str,
default='config.yaml', # Provide a default config file name
help="Path to the YAML configuration file."
)
# Add other potential command-line overrides here if needed later
# parser.add_argument('--debug', action='store_true', help="Override log level to DEBUG.")
args = parser.parse_args()
return args
# --- Main Execution ---
def main():
"""Main execution function."""
args = parse_arguments()
config_path = Path(args.config)
start_time = time.perf_counter()
# --- Configuration Loading ---
settings = load_settings(config_path)
logger.info(f"Using configuration from: {config_path.resolve()} (or defaults if loading failed)")
# --- Pipeline Execution ---
try:
# Call the main function from your pipeline module
run_eda_pipeline(settings)
end_time = time.perf_counter()
logger.info(f"Main script finished successfully in {end_time - start_time:.2f} seconds.")
except SystemExit as e:
# Catch SystemExit if pipeline runner exits intentionally
logger.warning(f"Pipeline exited with code {e.code}.")
sys.exit(e.code) # Propagate exit code
except Exception as e:
logger.critical(f"An critical error occurred during pipeline execution: {e}", exc_info=True)
end_time = time.perf_counter()
logger.info(f"Main script failed after {end_time - start_time:.2f} seconds.")
sys.exit(1)
return
if __name__ == "__main__":
main()
exit(1)