machine-learing-methods/Lab2/7.1/example.py
2023-12-06 00:00:07 +02:00

115 lines
3.2 KiB
Python

import h2o
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
from h2o.grid.grid_search import H2OGridSearch
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
# Initialize H2O cluster
h2o.init(nthreads=-1, max_mem_size="2G")
# Load data
data = pd.read_csv("customer_churn.csv")
#print(data.head())
tenure_churned = data[data['Churn'] == 'Yes']['tenure']
tenure_stayed = data[data['Churn'] == 'No']['tenure']
if False:
plt.figure()
sns.kdeplot(tenure_churned, color='firebrick', linewidth=3, label='Churned')
sns.kdeplot(tenure_stayed, color='dodgerblue', linewidth=3, label='Stayed')
plt.title('Tenure Variable')
plt.xlabel('Number of months the customer is with the company')
plt.legend(loc='upper right')
plt.grid(True)
plt.show()
# Split data
seed = 1
X_train, X_test, y_train, y_test = train_test_split(
data.drop("Churn", axis=1), # Features
data["Churn"], # Target variable
test_size=0.2,
random_state=seed
)
X_train['Churn']=y_train
X_test['Churn']=y_test
# Assuming 'train' is a pandas DataFrame
h2o_train = h2o.H2OFrame(X_train)
# Assuming 'test' is a pandas DataFrame
h2o_test = h2o.H2OFrame(X_test)
# Training the neural network
dl_model = H2ODeepLearningEstimator(
activation="Rectifier",
hidden=[2,2],
loss="CrossEntropy",
score_each_iteration=True,
epochs=10000,
balance_classes=False,
rate=0.01,
adaptive_rate=False,
stopping_rounds=0,
classification_stop=-1
)
dl_model.train(x=h2o_train.columns[0:19], y="Churn", training_frame=h2o_train)
if False:
plt.plot(dl_model.scoring_history().epochs,dl_model.scoring_history().training_logloss)
plt.xlabel('Epochs')
plt.ylabel('log_loss')
plt.grid()
plt.show()
# Prediction
prediction = dl_model.predict(h2o_test)
prediction = prediction["predict"].as_data_frame()
h2o_test_df = h2o_test.as_data_frame()
# Calculate accuracy
accuracy = (prediction['predict']== h2o_test_df["Churn"]).mean() * 100
print(f"Accuracy: {accuracy:.2f}%")
conf_matrix = pd.crosstab(prediction["predict"], h2o_test_df["Churn"], rownames=["Predicted"], colnames=["Actual"])
conf_matrix_diag = conf_matrix.values.diagonal() / conf_matrix.sum(axis=0) * 100
print("Confusion Matrix:")
print(conf_matrix)
print("Diagonal Percentages:")
print(conf_matrix_diag)
print("-------------- DEEPER --------------")
# Deeper model
dl_model_balanced = H2ODeepLearningEstimator(
activation="Rectifier",
hidden=[10,10,10],
loss="CrossEntropy",
score_each_iteration=True,
epochs=10000,
balance_classes=False,
rate=0.01,
adaptive_rate=False,
stopping_rounds=0,
classification_stop=-1
)
dl_model_balanced.train(x=h2o_train.columns[0:19], y="Churn", training_frame=h2o_train);
plt.plot(dl_model_balanced.scoring_history().epochs,dl_model_balanced.scoring_history().training_logloss)
plt.xlabel('Epochs')
plt.ylabel('log_loss')
plt.grid()
# Prediction
prediction = dl_model_balanced.predict(h2o_test)
prediction = prediction["predict"].as_data_frame()
h2o_test_df = h2o_test.as_data_frame()
# Calculate accuracy
accuracy = (prediction['predict']== h2o_test_df["Churn"]).mean() * 100
print(f"Accuracy: {accuracy:.2f}%")