Skip to content

utils.py

import yaml
import os
import logging

def validate_config(config):
    """
    Validates if the necessary keys are present in the configuration.

    Args:
        config (dict): Configuration dictionary.

    Raises:
        RuntimeError: If any required keys are missing.
    """
    required_keys = ["schema", "transformations", "missing_threshold"]
    for key in required_keys:
        if key not in config:
            raise RuntimeError(f"Missing required config key: {key}")

def load_config(path="pipeline_config.yml"):
    """
    Load YAML configuration for the data cleaning pipeline.

    Args:
        path (str): Path to the YAML config file.

    Returns:
        dict: Parsed configuration dictionary.

    Raises:
        RuntimeError: If the file cannot be loaded or parsed.
    """
    abs_path = os.path.abspath(path)
    try:
        with open(abs_path, "r") as f:
            config = yaml.safe_load(f)
            validate_config(config)
            logging.info(f"Successfully loaded config from {abs_path}")
            return config
    except FileNotFoundError:
        logging.error(f"Config file not found: {abs_path}")
        raise RuntimeError(f"Config file not found: {abs_path}")
    except yaml.YAMLError as e:
        logging.error(f"Error parsing YAML file {abs_path}: {e}")
        raise RuntimeError(f"Error parsing YAML file {abs_path}: {e}")
    except Exception as e:
        logging.error(f"Failed to load config from {abs_path}: {e}")
        raise RuntimeError(f"Failed to load config from {abs_path}: {e}")