bcd.py

# -*- coding: utf-8 -*-
"""breastcancerdetector.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1m730UYTcz66HXn4tAVbuWPtOEqgi3Nh4
"""

Importing the Dependencies

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

Data Collection and Processing

#loading the data from sklearn
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()

print(breast_cancer_dataset)

#loading the data to data frame 
data_frame = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)

# print the first 5 rows of the dataframe
data_frame.head()

#adding the 'target' column to the data frame
data_frame['label'] = breast_cancer_dataset.target

#print the last 5 rows to the dataframe
data_frame.tail()

data_frame.shape

data_frame.info()

# checking for missing values
data_frame.isnull().sum()

# statistical measures about the data
data_frame.describe()

# checking the distribution of target variable
data_frame['label'].value_counts()

"""1---> benign data(less risky)

0---->malignant(more risky)
"""

data_frame.groupby('label').mean()

"""seperating the features and target"""

X = data_frame.drop(columns='label', axis=1)
Y = data_frame['label']

print(X)

print(Y)

"""Splitting data into training data and testing data"""

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

print(X.shape, X_train.shape, X_test.shape)

"""model training

Logistic regression
"""

model = LogisticRegression()

model.fit(X_train, Y_train)

"""accuracy score"""

X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

print('Accuracy on taining data= ', training_data_accuracy)

X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

print('Accuracy on test data= ', test_data_accuracy)

# building a productive system

input_data = (13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,0.2699,0.7886,2.058,23.56,0.008462,0.0146,0.02387,0.01315,0.0198,0.0023,15.11,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259)

# change the input to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array as we are predicting for one datapoint
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0] == 0):
  print('The Breast cancer is Malignant')

else:
  print('The Breast cancer is Benign')

# importing tensorflow and keras
import tensorflow as tf
tf.random.set_seed(3)
from tensorflow import keras

# setting up the layers of neural network

model = keras.Sequential([
                           keras.layers.Flatten(input_shape=(30,)),
                           keras.layers.Dense(20, activation='relu'),
                           keras.layers.Dense(2, activation='sigmoid')
])

#compiling the neural network
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

"""dog-->0
cat--->1
horse-->2

dog - [1 0 0]
cat - [0 1 0]
horse - [0 0 1]
"""

# training the neural network
history = model.fit(X_train, Y_train, validation_split=0.1, epochs=10)

"""standardize the data"""

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

print(X)

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])


plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')

plt.legend(['training data', 'validattion data'], loc = 'center')

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])


plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['training data', 'validattion data'], loc = 'upper right')

"""accuracy of the model on test data"""

loss, accuracy = model.evaluate(X_test_std, Y_test)
print(accuracy)

print(X_test_std.shape)
print(X_test_std[0])

Y_pred = model.predict(X_test_std)

print(Y_pred.shape)
print(Y_pred[0])

print(X_test_std)

print(Y_pred)

# argmax function
my_list = [10, 20, 30]

index_of_max_value = np.argmax(my_list)
print(my_list)
print(index_of_max_value)

#converting prediction probability to class level

Y_pred_levels = [np.argmax(i) for i in Y_pred]
print(Y_pred_levels)

# change the input data into numpy array

input_data = ()
# change the input_data into numpy array
input_data_as_numpy_array = np.asarray(input_data)

#reshape the numpy array as we are predicting for one data point 

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardizing the input data
input_data_std = scaler.transform(input_data_reshaped)

prediction = model.predict(input_data_std)
print(prediction)

prediction_label = [np.argmax(prediction)]
print(prediction_label)

if(prediction_label[0] == 0):
  print('The tumour is Malignant')

else:
    print('The Tumour is Benign')