import h2o from h2o.estimators.deeplearning import H2ODeepLearningEstimator from h2o.grid.grid_search import H2OGridSearch import pandas as pd from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt import seaborn as sns # Initialize H2O cluster h2o.init(nthreads=-1, max_mem_size="2G") # Load data data = pd.read_csv("customer_churn.csv") #print(data.head()) tenure_churned = data[data['Churn'] == 'Yes']['tenure'] tenure_stayed = data[data['Churn'] == 'No']['tenure'] if False: plt.figure() sns.kdeplot(tenure_churned, color='firebrick', linewidth=3, label='Churned') sns.kdeplot(tenure_stayed, color='dodgerblue', linewidth=3, label='Stayed') plt.title('Tenure Variable') plt.xlabel('Number of months the customer is with the company') plt.legend(loc='upper right') plt.grid(True) plt.show() # Split data seed = 1 X_train, X_test, y_train, y_test = train_test_split( data.drop("Churn", axis=1), # Features data["Churn"], # Target variable test_size=0.2, random_state=seed ) X_train['Churn']=y_train X_test['Churn']=y_test # Assuming 'train' is a pandas DataFrame h2o_train = h2o.H2OFrame(X_train) # Assuming 'test' is a pandas DataFrame h2o_test = h2o.H2OFrame(X_test) # Training the neural network dl_model = H2ODeepLearningEstimator( activation="Rectifier", hidden=[2,2], loss="CrossEntropy", score_each_iteration=True, epochs=10000, balance_classes=False, rate=0.01, adaptive_rate=False, stopping_rounds=0, classification_stop=-1 ) dl_model.train(x=h2o_train.columns[0:19], y="Churn", training_frame=h2o_train) if False: plt.plot(dl_model.scoring_history().epochs,dl_model.scoring_history().training_logloss) plt.xlabel('Epochs') plt.ylabel('log_loss') plt.grid() plt.show() # Prediction prediction = dl_model.predict(h2o_test) prediction = prediction["predict"].as_data_frame() h2o_test_df = h2o_test.as_data_frame() # Calculate accuracy accuracy = (prediction['predict']== h2o_test_df["Churn"]).mean() * 100 print(f"Accuracy: {accuracy:.2f}%") conf_matrix = pd.crosstab(prediction["predict"], h2o_test_df["Churn"], rownames=["Predicted"], colnames=["Actual"]) conf_matrix_diag = conf_matrix.values.diagonal() / conf_matrix.sum(axis=0) * 100 print("Confusion Matrix:") print(conf_matrix) print("Diagonal Percentages:") print(conf_matrix_diag) print("-------------- DEEPER --------------") # Deeper model dl_model_balanced = H2ODeepLearningEstimator( activation="Rectifier", hidden=[10,10,10], loss="CrossEntropy", score_each_iteration=True, epochs=10000, balance_classes=False, rate=0.01, adaptive_rate=False, stopping_rounds=0, classification_stop=-1 ) dl_model_balanced.train(x=h2o_train.columns[0:19], y="Churn", training_frame=h2o_train); plt.plot(dl_model_balanced.scoring_history().epochs,dl_model_balanced.scoring_history().training_logloss) plt.xlabel('Epochs') plt.ylabel('log_loss') plt.grid() # Prediction prediction = dl_model_balanced.predict(h2o_test) prediction = prediction["predict"].as_data_frame() h2o_test_df = h2o_test.as_data_frame() # Calculate accuracy accuracy = (prediction['predict']== h2o_test_df["Churn"]).mean() * 100 print(f"Accuracy: {accuracy:.2f}%")