ml/4_hyperopt_example.py¶

import pandas as pd
from hyperopt import hp
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC

from astrodata.ml.metrics import SklearnMetric
from astrodata.ml.model_selection import HyperOptSelector
from astrodata.ml.models import SklearnModel

if __name__ == "__main__":
    # Load the breast cancer dataset
    data = load_iris()
    X = pd.DataFrame(data.data, columns=data.feature_names)
    y = pd.Series(data.target)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42
    )

    # Instantiate the SklearnModel with LinearSVC and a metric
    model1 = SklearnModel(model_class=LinearSVC, penalty="l2", loss="squared_hinge")
    model2 = SklearnModel(model_class=LinearSVC, penalty="l1", loss="squared_hinge")
    accuracy = SklearnMetric(accuracy_score)
    f1 = SklearnMetric(f1_score, average="micro")

    metrics = [accuracy, f1]

    # Define the hyperopt search space
    param_space = {
        "model": hp.choice("model", [model1, model2]),
        "C": hp.choice("C", [0.1, 1, 10]),
        "max_iter": hp.choice("max_iter", [1000, 2000]),
        "tol": hp.choice("tol", [1e-3, 1e-4]),
    }

    # Instantiate HyperOptSelector (using cross-validation in this example)
    hos = HyperOptSelector(
        param_space=param_space,
        scorer=accuracy,
        use_cv=True,
        cv=5,
        random_state=42,
        max_evals=100,  # You can increase this for a more thorough search
        metrics=metrics,
    )

    print(hos)

    hos.fit(X_train, y_train, X_test=X_test, y_test=y_test)

    print("Best parameters found:", hos.get_best_params())
    print("Best metrics:", hos.get_best_metrics())
    print("Best model params:", hos.get_best_model().get_params())