Notes and References
Weights and Gradients Initialization:
Forward Propagation:
Output Activation:
Loss Calculation:
Backward Propagation and Weight Update:
import numpy as np
import matplotlib.pyplot as plt
class Perceptron:
def __init__(self, input_size, learning_rate=0.01):
self.weights = np.random.rand(input_size + 1)
self.learning_rate = learning_rate
def activation_function(self, x):
return 1 if x >= 0 else 0
def predict(self, inputs):
inputs = np.insert(inputs, 0, 1) # Insert bias term
summation = np.dot(self.weights, inputs)
return self.activation_function(summation)
def train(self, inputs, target):
prediction = self.predict(inputs)
error = target - prediction
inputs = np.insert(inputs, 0, 1) # Insert bias term
self.weights += self.learning_rate * error * inputs
class PerceptronLayer:
def __init__(self, input_size, num_neurons, learning_rate=0.01):
self.perceptrons = [Perceptron(input_size, learning_rate) for _ in range(num_neurons)]
def predict(self, inputs):
return np.array([perceptron.predict(inputs) for perceptron in self.perceptrons])
def train(self, inputs, target):
for perceptron, target_value in zip(self.perceptrons, target):
perceptron.train(inputs, target_value)
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
self.hidden_layer = PerceptronLayer(input_size, hidden_size, learning_rate)
self.output_layer = PerceptronLayer(hidden_size, output_size, learning_rate)
def train(self, inputs, target):
hidden_output = self.hidden_layer.predict(inputs)
self.output_layer.train(hidden_output, target)
def predict(self, inputs):
hidden_output = self.hidden_layer.predict(inputs)
return self.output_layer.predict(hidden_output)
def plot_decision_boundary(network, x_range, y_range):
xx, yy = np.meshgrid(np.linspace(x_range[0], x_range[1], 100), np.linspace(y_range[0], y_range[1], 100))
predictions = np.array([network.predict(np.array([x, y])) for x, y in zip(xx.ravel(), yy.ravel())])
predictions = predictions.reshape(xx.shape)
plt.contourf(xx, yy, predictions, cmap=plt.cm.Spectral, alpha=0.8)
plt.scatter(data[:, 0], data[:, 1], c=labels, cmap=plt.cm.Spectral)
plt.show()
if __name__ == "__main__":
# Example usage for XOR problem
data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
labels = np.array([0, 1, 1, 0])
input_size = 2
hidden_size = 4
output_size = 1
nn = NeuralNetwork(input_size, hidden_size, output_size, learning_rate=0.1)
# Train the neural network
for epoch in range(10000):
for example, label in zip(data, labels):
nn.train(example, label)
# Plot decision boundary
plot_decision_boundary(nn, [0, 1], [0, 1])
More complex example
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
class Perceptron:
def __init__(self, input_size, learning_rate=0.01):
self.weights = np.random.rand(input_size + 1)
self.learning_rate = learning_rate
def relu(self, x):
return np.maximum(0, x)
def softmax(self, x):
exp_values = np.exp(x - np.max(x, axis=-1, keepdims=True))
return exp_values / np.sum(exp_values, axis=-1, keepdims=True)
def predict(self, inputs):
inputs = np.insert(inputs, 0, 1) # Insert bias term
hidden_output = self.relu(np.dot(self.weights, inputs))
return self.softmax(hidden_output)
def train(self, inputs, target):
prediction = self.predict(inputs)
error = target - prediction
inputs = np.insert(inputs, 0, 1) # Insert bias term
gradient = np.dot(error, inputs)
self.weights += self.learning_rate * gradient
class PerceptronLayer:
def __init__(self, input_size, num_neurons, learning_rate=0.01):
self.perceptrons = [Perceptron(input_size, learning_rate) for _ in range(num_neurons)]
def predict(self, inputs):
return np.array([perceptron.predict(inputs) for perceptron in self.perceptrons])
def train(self, inputs, target):
for perceptron, target_value in zip(self.perceptrons, target):
perceptron.train(inputs, target_value)
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
self.hidden_layer = PerceptronLayer(input_size, hidden_size, learning_rate)
self.output_layer = PerceptronLayer(hidden_size, output_size, learning_rate)
def train(self, inputs, target):
hidden_output = self.hidden_layer.predict(inputs)
self.output_layer.train(hidden_output, target)
def predict(self, inputs):
hidden_output = self.hidden_layer.predict(inputs)
return self.output_layer.predict(hidden_output)
def one_hot_encode(labels, num_classes):
one_hot_labels = np.zeros((len(labels), num_classes))
one_hot_labels[np.arange(len(labels)), labels] = 1
return one_hot_labels
def plot_decision_boundary(network, data, labels, title):
h = .02
x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = np.argmax(np.array([network.predict(np.array([x, y])) for x, y in zip(xx.ravel(), yy.ravel())]), axis=1)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.8)
plt.scatter(data[:, 0], data[:, 1], c=labels, cmap=plt.cm.Spectral)
plt.title(title)
plt.show()
def main():
# Load Iris dataset
iris = datasets.load_iris()
data = iris.data[:, :2] # Use only the first two features for simplicity
labels = iris.target
# Standardize the features
scaler = StandardScaler()
data = scaler.fit_transform(data)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)
num_classes = len(np.unique(labels))
y_train_encoded = one_hot_encode(y_train, num_classes)
input_size = X_train.shape[1]
hidden_size = 8
output_size = num_classes
nn = NeuralNetwork(input_size, hidden_size, output_size, learning_rate=0.01)
# Train the neural network
for epoch in range(5000):
for example, label in zip(X_train, y_train_encoded):
nn.train(example, label)
# Make predictions on the test set
y_pred_encoded = np.array([nn.predict(example) for example in X_test])
y_pred = np.argmax(y_pred_encoded, axis=1)
# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
# Plot decision boundary
plot_decision_boundary(nn, X_test, y_test, "Decision Boundary for Iris Classification")
if __name__ == "__main__":
main()