pygrad/pygrad/tensor.py

import numpy as np

def tensor(*args, **kwargs):
  return Tensor(*args, **kwargs)

class Tensor:
  # TODO Implement 'requires_grad' functionality.
  def __init__(self, value):
    # TODO Add support for scalar values.
    if isinstance(value, list):
      value = np.array(value)
    if not isinstance(value, np.ndarray):
      print(f"{type(value)} is not compatible with {np.ndarray}")
      exit(-1)

    self.value = value
    self.grad = np.zeros_like(value)
    # Required for backprop.
    self._parents = None
    self._back = None

  # uwu literally the only place where I have type annotations
  def __repr__(self) -> str:
    return f"Tensor(value={self.value}, grad={self.grad})"

  # Save values for the backward pass.
  def _save(self, *args):
    self._parents = args

  # TODO Maybe refactor the functions system? Maybe something like pytorch/tinygrad?
  def add(self, other):
    tensor = Tensor(np.add(self.value, other.value))
    tensor._save(self, other)

    def back(upstream):
      return np.dot(np.ones_like(self.value).T, upstream), np.dot(np.ones_like(self.value).T, upstream)

    tensor._back = back
    return tensor

  def sub(self, other):
    tensor = Tensor(np.add(self.value, other.value))
    tensor._save(self, other)

    def back(upstream):
      return np.dot(np.ones_like(self.value).T, upstream), -np.dot(np.ones_like(self.value).T, upstream)

    tensor._back = back
    return tensor

  def mul(self, other):
    tensor = Tensor(np.dot(self.value, other.value))
    tensor._save(self, other)

    def back(upstream):
      a, b = tensor._parents
      return np.dot(upstream, b.value.T), np.dot(a.value.T, upstream)

    tensor._back = back
    return tensor

  def div(self, other):
    tensor = Tensor(self.value / other.value)
    tensor._save(self, other)

    def back(upstream):
      a, b = tensor._parents
      return 1 / np.dot(b.value, upstream), -a.value / np.dot(b.value ** 2, upstream)

    tensor._back = back
    return tensor

  def neg(self):
    tensor = Tensor(-self.value)
    tensor._save(self)

    def back(upstream):
      return [np.dot(-np.ones_like(self.value), upstream)]

    tensor._back = back
    return tensor

  def expt(self, exponent):
    tensor = Tensor(self.value ** exponent)
    tensor._save(self)

    def back(upstream):
      a, = tensor._parents
      return [np.dot(exponent * (a.value ** (exponent - 1)), upstream)]

    tensor._back = back
    return tensor

  def reciprocal(self):
    tensor = Tensor(1.0 / self.value)
    tensor._save(self)

    def back(upstream):
      a, = tensor._parents
      return [np.dot(-1.0 / (a.value ** 2), upstream)]

    tensor._back = back
    return tensor

  def exp(self):
    tensor = Tensor(np.exp(self.value))
    tensor._save(self)

    def back(upstream):
      a, = tensor._parents
      return [np.dot(np.exp(a.value), upstream)]

    tensor._back = back
    return tensor

  def log(self):
    tensor = Tensor(np.log(self.value))
    tensor._save(self)

    def back(upstream):
      a, = tensor._parents
      return [np.dot(1 / a.value, upstream)]

    tensor._back = back
    return tensor

  def tanh(self):
    tensor = Tensor(np.tanh(self.value))
    tensor._save(self)

    def back(upstream):
      # dtanh(x)/dx = 1 - tanh2(x)
      a, = tensor._parents
      return [np.ones_like(self.value) - np.dot(upstream, (np.tanh(a.value) ** 2).T)]

    tensor._back = back
    return tensor

  # TODO Compute gradients only for tensors that need it.
  def _backprop(self, upstream):
    # Backprop through the tensor iff it has any parents.
    if self._parents is not None:
      for node, grad in zip(self._parents, self._back(upstream)):
        # Set the node gradient to the computed gradient.
        node.grad = grad
        # Iterate through all (possible) parent nodes of this node.
        node._backprop(grad)

  def backward(self):
    # Partial of self with respect to self is ALWAYS 1.
    self.grad = np.ones_like(self.value)
    self._backprop(self.grad)