from dask_ml.datasets import make_classification
X, y = make_classification(n_samples=10000000, n_features=20,
chunks=100000, random_state=0)
# Create the model
from sklearn.linear_model import SGDClassifier
model = SGDClassifier(tol=1e-3, penalty='elasticnet', random_state=0)
# Parameters we want to search through
params = {'alpha': np.logspace(-2, 1, num=1000),
'l1_ratio': np.linspace(0, 1, num=1000),
'average': [True, False]}
# Perform the search
from dask_ml.model_selection import IncrementalSearchCV
search = IncrementalSearchCV(model, params, random_state=0, decay_rate=None)
start_time = time.time()
search.fit(X, y, classes=[0, 1])
end_time = time.time()
# Calculate the elapsed time
elapsed_time = end_time - start_time
# Print the best parameters, best score, and the time taken
print("Best parameters found: ", search.best_params_)
print("Best score: ", search.best_score_)
print("Best estimator: ", search.best_estimator_)
print(f"Time taken: {elapsed_time:.2f} seconds")