# Install Required Packages

!pip install pandas numpy matplotlib seaborn scikit-learn joblib

Requirement already satisfied: pandas in c:\users\omprakash\anaconda3\lib\site-packages (2.2.2)
Requirement already satisfied: numpy in c:\users\omprakash\anaconda3\lib\site-packages (1.26.4)
Requirement already satisfied: matplotlib in c:\users\omprakash\anaconda3\lib\site-packages (3.9.2)
Requirement already satisfied: seaborn in c:\users\omprakash\anaconda3\lib\site-packages (0.13.2)
Requirement already satisfied: scikit-learn in c:\users\omprakash\anaconda3\lib\site-packages (1.5.1)
Requirement already satisfied: joblib in c:\users\omprakash\anaconda3\lib\site-packages (1.4.2)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\omprakash\anaconda3\lib\site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\omprakash\anaconda3\lib\site-packages (from pandas) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\omprakash\anaconda3\lib\site-packages (from pandas) (2023.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\omprakash\anaconda3\lib\site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\omprakash\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\omprakash\anaconda3\lib\site-packages (from matplotlib) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\omprakash\anaconda3\lib\site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\omprakash\anaconda3\lib\site-packages (from matplotlib) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\omprakash\anaconda3\lib\site-packages (from matplotlib) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\omprakash\anaconda3\lib\site-packages (from matplotlib) (3.1.2)
Requirement already satisfied: scipy>=1.6.0 in c:\users\omprakash\anaconda3\lib\site-packages (from scikit-learn) (1.13.1)
Requirement already satisfied: threadpoolctl>=3.1.0 in c:\users\omprakash\anaconda3\lib\site-packages (from scikit-learn) (3.5.0)
Requirement already satisfied: six>=1.5 in c:\users\omprakash\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
import joblib

# Load the Heart Disease dataset

heart_data = pd.read_csv("heart.csv")

# Display the first few rows of the dataset

print("First few rows of Heart Disease dataset:")
print(heart_data.head())

First few rows of Heart Disease dataset:
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   52    1   0       125   212    0        1      168      0      1.0      2   
1   53    1   0       140   203    1        0      155      1      3.1      0   
2   70    1   0       145   174    0        1      125      1      2.6      0   
3   61    1   0       148   203    0        1      161      0      0.0      2   
4   62    0   0       138   294    1        1      106      0      1.9      1   

   ca  thal  target  
0   2     3       0  
1   0     3       0  
2   0     3       0  
3   1     3       0  
4   3     2       0

# Step 1: Data Preprocessing

# Handle missing values (if any)

heart_data.fillna(heart_data.mean(), inplace=True)

# Step 2: Encoding the target variable 'target' to binary (0: No heart disease, 1: Heart disease)

encoder = LabelEncoder()
heart_data['target'] = encoder.fit_transform(heart_data['target'])

# Step 3: Feature Scaling

# Separate features and target

X = heart_data.drop('target', axis=1)  # Features
y = heart_data['target']  # Target

# Normalize the features

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Train-Test Split (80% train, 20% test)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 5: Train the Random Forest Classifier

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

RandomForestClassifier(random_state=42)

# Step 6: Make Predictions

y_pred = rf_model.predict(X_test)

# Step 7: Evaluate the Model

# Accuracy

print(f"Accuracy of Random Forest Model: {accuracy_score(y_test, y_pred):.2f}")

Accuracy of Random Forest Model: 0.99

# Classification report

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205

# Confusion Matrix Visualization

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=encoder.classes_)
disp.plot(cmap="Blues")
plt.title("Confusion Matrix for Heart Disease Model")
plt.show()

# Step 8: Feature Importance Visualization

def plot_feature_importance(model, feature_names, title="Feature Importance"):
    feature_importances = model.feature_importances_
    sorted_idx = np.argsort(feature_importances)[::-1]

    plt.figure(figsize=(10, 6))
    sns.barplot(x=feature_importances[sorted_idx], y=feature_names[sorted_idx])
    plt.title(title)
    plt.xlabel('Importance')
    plt.ylabel('Features')
    plt.show()

# Plot Feature Importance

plot_feature_importance(rf_model, X.columns, "Feature Importance for Heart Disease Model")

# Step 9: Save the Model and Scaler
joblib.dump(rf_model, "heart_disease_model.pkl")
joblib.dump(scaler, "scaler.pkl")
print("\nModel and Scaler saved!")

Model and Scaler saved!

# Step 10: Load the saved model and scaler for prediction

loaded_model = joblib.load("heart_disease_model.pkl")
loaded_scaler = joblib.load("scaler.pkl")

def predict_new_data(model, scaler, new_data, feature_names):
    # Convert the new data to a DataFrame to align with the feature names
    new_data_df = pd.DataFrame([new_data], columns=feature_names)
    
    # Scale the new data
    scaled_input = scaler.transform(new_data_df)
    
    # Make the prediction
    prediction = model.predict(scaled_input)
    
    # Interpret the prediction (0: No heart disease, 1: Heart disease)
    return "No Heart Disease" if prediction[0] == 0 else "Heart Disease"

# Use the feature names (column names of the Heart Disease dataset)

feature_names = X.columns.tolist()

# Example input data for a new patient

new_patient_data = [63, 145, 233, 1, 150, 0.0, 2, 174, 0, 0, 0, 3, 0.0]

# Make prediction for the new patient

prediction = predict_new_data(loaded_model, loaded_scaler, new_patient_data, feature_names)
print(f"Prediction for New Patient: {prediction}")

Prediction for New Patient: Heart Disease

import matplotlib.pyplot as plt

# Example new patient data for two cases:

new_patient_with_heart_disease = [63, 145, 233, 1, 150, 0.0, 2, 174, 0, 0, 0, 3, 0.0]
new_patient_without_heart_disease = [60, 130, 230, 0, 120, 0.0, 1, 160, 0, 0, 0, 2, 0.0,]

# Get probabilities for both cases (heart disease vs. no heart disease)

prob_with_heart_disease = loaded_model.predict_proba([new_patient_with_heart_disease])[0]
prob_without_heart_disease = loaded_model.predict_proba([new_patient_without_heart_disease])[0]

# Create a bar plot to compare the two cases

labels = ['No Heart Disease', 'Heart Disease']

fig, axes = plt.subplots(1, 2, figsize=(12, 6))

# Plot for the patient with heart disease
axes[0].bar(labels, prob_with_heart_disease, color=['blue', 'red'])
axes[0].set_title('Prediction Probability: Patient with Heart Disease')
axes[0].set_ylim(0, 1)
axes[0].set_xlabel('Prediction')
axes[0].set_ylabel('Probability')

# Plot for the patient without heart disease
axes[1].bar(labels, prob_without_heart_disease, color=['blue', 'red'])
axes[1].set_title('Prediction Probability: Patient without Heart Disease')
axes[1].set_ylim(0, 1)
axes[1].set_xlabel('Prediction')
axes[1].set_ylabel('Probability')

# Display the plots
plt.tight_layout()
plt.show()

Predicting Heart Disease Risk with Machine Learning Models in Python¶

Key Features:¶

Conclusion:¶