Skip to content

SQL.py

from pandas_gbq import read_gbq

def query_bigquery_dataset(
    project_id: str,
    dataset: str,
    table: str,
    fields: list = None,
    where_clause: str = "",
    rename_map: dict = None
) -> "pd.DataFrame":
    """
    General-purpose BigQuery query utility.

    Parameters:
        project_id (str): Your GCP project ID
        dataset (str): BigQuery dataset name or full dataset path including project ID (e.g., "bigquery-public-data.ml_datasets")
        table (str): Table name within the dataset
        fields (list, optional): List of fields to select (default: all)
        where_clause (str, optional): Optional SQL WHERE clause without 'WHERE'
        rename_map (dict, optional): Dictionary to rename columns after pull

    Returns:
        pd.DataFrame: A DataFrame containing the queried result
    """
    selected_fields = ", ".join(fields) if fields else "*"
    where_sql = f"WHERE {where_clause}" if where_clause else ""

    query = f"""
    SELECT {selected_fields}
    FROM `{dataset}.{table}`
    {where_sql}
    """
    df = read_gbq(query, project_id=project_id, use_bqstorage_api=False)

    if rename_map:
        df.rename(columns=rename_map, inplace=True)

    return df