From a75365a1ab13e22a3e6a465b74139ee023bcef70 Mon Sep 17 00:00:00 2001
From: aodhneine <aodhneine@protonmail.ch>
Date: Sun, 15 Nov 2020 18:59:06 +0000
Subject: [PATCH] Refactor into more traditional project structure

Splits nn.py into pygrad/nn.py and pygrad/tensor.py.
---
 nn.py            | 146 -----------------------------------------------
 pygrad/nn.py     |  61 ++++++++++++++++++++
 pygrad/tensor.py |  93 ++++++++++++++++++++++++++++++
 3 files changed, 154 insertions(+), 146 deletions(-)
 delete mode 100644 nn.py
 create mode 100644 pygrad/nn.py
 create mode 100644 pygrad/tensor.py

diff --git a/nn.py b/nn.py
deleted file mode 100644
index 3078b53..0000000
--- a/nn.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# neural networks from scratch with numpy
-
-import numpy as np
-
-def mean_absolute_error(x, y):
-  return np.mean(np.abs(x - y))
-
-def mean_squared_error(x, y):
-  return np.mean(np.power(x - y, 2))
-
-def cross_entropy_loss(x, y):
-  return -np.log(np.exp(y) / np.sum(np.exp(x)))
-
-# preapre inputs and outputs
-x = np.array([[1, 0]])
-y = np.array([[1]])
-
-# we're doing xavier initialisation - see <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>
-w1 = np.random.randn(2, 3) / np.sqrt(2)
-w2 = np.random.randn(3, 1) / np.sqrt(3)
-
-def single_pass():
-  # forward pass
-  h = np.matmul(x, w1)
-  h_hat = np.tanh(h)
-  j = np.matmul(h_hat, w2)
-  print("prediction {}".format(j))
-
-  # loss calculation
-  loss = cross_entropy_loss(j, y)
-  print("loss {}".format(loss))
-
-  # TODO Backward pass.
-  return
-
-# initialise layers
-#   self.lin1 = nn.Linear(2, 3)
-#   self.lin2 = nn.Linear(3, 1)
-#   self.loss = nn.MSELoss()
-# and then
-#   x = self.lin1(x)
-#   x = F.relu(x)
-#   x = self.lin2(x)
-#   x = F.softmax(x)
-#   loss = self.loss(x, y)
-
-# TODO Add support for numpy arrays.
-class Tensor:
-  # TODO Implement 'requires_grad' functionality.
-  def __init__(self, value):
-    self.value = value
-    self.grad = 0
-    # Required for backprop.
-    self._parents = None
-    self._back = None
-
-  # uwu literally the only place where I have type annotations
-  def __repr__(self) -> str:
-    return f"Tensor(value={self.value}, grad={self.grad})"
-
-  # Save values for the backward pass.
-  def _save(self, *args):
-    self._parents = args
-
-  # TODO Maybe refactor the functions system? Maybe something like pytorch/tinygrad?
-  def add(self, other):
-    tensor = Tensor(self.value +  other.value)
-    tensor._save(self, other)
-
-    def back(upstream):
-      return upstream * 1, upstream * 1
-
-    tensor._back = back
-    return tensor
-
-  def mul(self, other):
-    tensor = Tensor(self.value * other.value)
-    tensor._save(self, other)
-
-    def back(upstream):
-      a, b = tensor._parents
-      return upstream * b.value, upstream * a.value
-
-    tensor._back = back
-    return tensor
-
-  def expt(self, exponent):
-    tensor = Tensor(self.value ** exponent)
-    tensor._save(self)
-
-    def back(upstream):
-      a, = tensor._parents
-      return [ upstream * exponent * (a.value ** (exponent - 1)) ]
-
-    tensor._back = back
-    return tensor
-
-  def reciprocal(self):
-    tensor = Tensor(1.0 / self.value)
-    tensor._save(self)
-
-    def back(upstream):
-      a, = tensor._parents
-      return [ -1.0 / (a.value ** 2) ]
-
-    tensor._back = back
-    return tensor
-
-  def exp(self):
-    tensor = Tensor(np.exp(self.value))
-    tensor._save(self)
-
-    def back(upstream):
-      a, = tensor._parents
-      return [ np.exp(a.value) ]
-
-    tensor._back = back
-    return tensor
-
-  # TODO Compute gradients only for tensors that need it.
-  def _backprop(tensor, upstream):
-    # Backprop through the tensor iff it has any parents.
-    if tensor._parents is not None:
-      for node, grad in zip(tensor._parents, tensor._back(upstream)):
-        # Set the node gradient to the computed gradient.
-        node.grad = grad
-        # Iterate through all (possible) parent nodes of this node.
-        node._backprop(grad)
-
-  def backward(self):
-    # Partial of self with respect to self is ALWAYS 1.
-    self.grad = 1
-    Tensor._backprop(self, self.grad)
-
-# Small test to see if autograd works.
-def test():
-  # Forward pass.
-  x, y, z = Tensor(-2), Tensor(5), Tensor(-4)
-  q = x.add(y)
-  h = q.expt(2)
-  w = h.mul(z)
-  print(f"q = {q}, w = {w}")
-
-  # Backward pass.
-  w.backward()
-  print(f"is: dw = {w.grad}, dz = {z.grad}, dy = {y.grad}, dx = {x.grad}")
diff --git a/pygrad/nn.py b/pygrad/nn.py
new file mode 100644
index 0000000..d17a42a
--- /dev/null
+++ b/pygrad/nn.py
@@ -0,0 +1,61 @@
+# Neural networks from scratch with numpy.
+
+import numpy as np
+import pygrad.tensor as tensor
+
+def mean_absolute_error(x, y):
+  return np.mean(np.abs(x - y))
+
+def mean_squared_error(x, y):
+  return np.mean(np.power(x - y, 2))
+
+def cross_entropy_loss(x, y):
+  return -np.log(np.exp(y) / np.sum(np.exp(x)))
+
+# prepare inputs and outputs
+x = np.array([[1, 0]])
+y = np.array([[1]])
+
+# we're doing xavier initialisation - see <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>
+w1 = np.random.randn(2, 3) / np.sqrt(2)
+w2 = np.random.randn(3, 1) / np.sqrt(3)
+
+def single_pass():
+  # forward pass
+  h = np.matmul(x, w1)
+  h_hat = np.tanh(h)
+  j = np.matmul(h_hat, w2)
+  print("prediction {}".format(j))
+
+  # loss calculation
+  loss = cross_entropy_loss(j, y)
+  print("loss {}".format(loss))
+
+  # TODO Backward pass.
+  return
+
+# initialise layers
+#   self.lin1 = nn.Linear(2, 3)
+#   self.lin2 = nn.Linear(3, 1)
+#   self.loss = nn.MSELoss()
+# and then
+#   x = self.lin1(x)
+#   x = F.relu(x)
+#   x = self.lin2(x)
+#   x = F.softmax(x)
+#   loss = self.loss(x, y)
+
+# Small test to see if autograd works.
+def test():
+  # Input tensors.
+  x, y, z = tensor.Tensor(-2), tensor.Tensor(5), tensor.Tensor(-4)
+  
+  # Forward pass.
+  q = x.add(y)
+  h = q.expt(2)
+  w = h.mul(z)
+  print(f"q = {q}, w = {w}")
+
+  # Backward pass.
+  w.backward()
+  print(f"is: dw = {w.grad}, dz = {z.grad}, dy = {y.grad}, dx = {x.grad}")
diff --git a/pygrad/tensor.py b/pygrad/tensor.py
new file mode 100644
index 0000000..d71a3e2
--- /dev/null
+++ b/pygrad/tensor.py
@@ -0,0 +1,93 @@
+import numpy as np
+
+class Tensor:
+  # TODO Implement 'requires_grad' functionality.
+  def __init__(self, value):
+    if not isinstance(value, np.ndarray):
+      print(f"{type(value)} is not compatible with {np.ndarray}")
+      exit(-1)
+
+    self.value = value
+    self.grad = np.zeros_like(value)
+    # Required for backprop.
+    self._parents = None
+    self._back = None
+
+  # uwu literally the only place where I have type annotations
+  def __repr__(self) -> str:
+    return f"Tensor(value={self.value}, grad={self.grad})"
+
+  # Save values for the backward pass.
+  def _save(self, *args):
+    self._parents = args
+
+  # TODO Maybe refactor the functions system? Maybe something like pytorch/tinygrad?
+  def add(self, other):
+    tensor = Tensor(np.add(self.value, other.value))
+    tensor._save(self, other)
+
+    def back(upstream):
+      return np.dot(np.ones_like(self.value), upstream), np.dot(np.ones_like(self.value), upstream)
+
+    tensor._back = back
+    return tensor
+
+  def mul(self, other):
+    tensor = Tensor(np.dot(self.value, other.value))
+    tensor._save(self, other)
+
+    def back(upstream):
+      a, b = tensor._parents
+      return np.dot(b.value, upstream), np.dot(a.value.T, upstream)
+
+    tensor._back = back
+    return tensor
+
+  def expt(self, exponent):
+    tensor = Tensor(self.value ** exponent)
+    tensor._save(self)
+
+    def back(upstream):
+      a, = tensor._parents
+      return [np.dot(exponent * (a.value ** (exponent - 1)), upstream)]
+
+    tensor._back = back
+    return tensor
+
+  def reciprocal(self):
+    tensor = Tensor(1.0 / self.value)
+    tensor._save(self)
+
+    def back(upstream):
+      a, = tensor._parents
+      return [np.dot(-1.0 / (a.value ** 2), upstream)]
+
+    tensor._back = back
+    return tensor
+
+  def exp(self):
+    tensor = Tensor(np.exp(self.value))
+    tensor._save(self)
+
+    def back(upstream):
+      a, = tensor._parents
+      return [np.dot(np.exp(a.value), upstream)]
+
+    tensor._back = back
+    return tensor
+
+  # TODO Compute gradients only for tensors that need it.
+  def _backprop(self, upstream):
+    print(upstream)
+    # Backprop through the tensor iff it has any parents.
+    if self._parents is not None:
+      for node, grad in zip(self._parents, self._back(upstream)):
+        # Set the node gradient to the computed gradient.
+        node.grad = grad
+        # Iterate through all (possible) parent nodes of this node.
+        node._backprop(grad)
+
+  def backward(self):
+    # Partial of self with respect to self is ALWAYS 1.
+    self.grad = np.ones_like(self.value)
+    self._backprop(self.grad)