Commit 590139d8 authored by Elif Ceylan's avatar Elif Ceylan
Browse files

GROUPWORK_clean up and comment

parent 9415e3e8
# External
import torch
## Base class
## All the other classes in this file inherit from class Module
class Module(object):
def forward(self, *input):
raise NotImplementedError
......@@ -16,53 +18,57 @@ class Module(object):
def zero_grad(self):
pass
# Modules to Implement:
## Linear
## ReLU
## LeakyReLU
## Tanh
## Sigmoid
## Sequential
## MSELoss
## CrossEntropyLoss
# Optimizer: SGD
## Network framework
class Sequential(Module):
def __init__(self, modules) -> None:
super().__init__()
self.modules = modules
self.params = []
def forward(self, x):
# go through all the modules inside the self.modules list
for module in self.modules:
# call forward function of the respective module
# while passing input (x) as an argument
x = module.forward(x)
return x
def backward(self, gradwrtoutput):
# when doing backward, we reverse the self.modules list
# because we have to start from the last module
for module in reversed(self.modules):
# gradient wrt output update
gradwrtoutput = module.backward(gradwrtoutput)
if not module.param == [(None, None)]: #?
# populate the self.params list for later use
if not module.param == [(None, None)]:
self.params = module.param()
return gradwrtoutput
def param(self):
# collect all the params and return them in a list
for module in self.modules:
self.params.append(module.param())
return self.params
def update_params(self, lr):
# call update_params of every module
for module in self.modules:
module.update_params(lr)
def zero_grad(self):
# call zero_grad of every module
for module in self.modules:
module.zero_grad()
## Fully connected layers
class Linear(Module):
def __init__(self, input_size, output_size) -> None:
super().__init__()
# class members
# necessary for all networks types
self.input_size = input_size
self.output_size = output_size
self.w = torch.empty(output_size, input_size).normal_()
......@@ -70,53 +76,44 @@ class Linear(Module):
self.grad_w = torch.empty(self.w.size())
self.grad_b = torch.empty(self.b.size())
self.x = None
# velocity_w and _b are necessary for SGD
self.velocity_w = None
self.velocity_b = None
def forward(self, input):
self.x = input
# linear transformation = (w * x + b)
return torch.add(torch.matmul(self.x, self.w.t()), self.b)
def backward(self, gradwrtoutput):
# update grad_w and _b with gradwrtoutput
self.grad_w += gradwrtoutput.t().mm(self.x)
self.grad_b += gradwrtoutput.sum(dim=0)
return gradwrtoutput.mm(self.w) # is it a sum
# gradient of linear tranformation = w
return gradwrtoutput.mm(self.w)
def param(self):
return [(self.w, self.grad_w), (self.b, self.grad_b)]
def update_params(self, lr=None, opt=False, w=None, b=None):
# if opt=true then we update w and b with
# optimizer obtained values
if opt == True:
self.w = w
self.b = b
# else we do the GD
else:
self.w = self.w - (lr/self.x.size(0))*self.grad_w
self.b = self.b - (lr/self.x.size(0))*self.grad_b
# self.w = self.w - (lr)*self.grad_w
# self.b = self.b - (lr)*self.grad_b
self.w = self.w - (lr/self.x.size(0))*self.grad_w # w = w - (lr/batch_size) * grad_w
self.b = self.b - (lr/self.x.size(0))*self.grad_b # b = b - (lr/batch_size) * grad_b
def zero_grad(self):
self.grad_w.zero_() #check with empty if encounter problem
# set gradients to zero
self.grad_w.zero_()
self.grad_b.zero_()
#this is tanh
class sigma(Module):
def __init__(self) -> None:
super().__init__()
self.x = None
def forward(self, input):
self.x = input
return torch.tanh(self.x)
def backward(self, gradwrtoutput):
return (4 * (self.x.exp() + self.x.mul(-1).exp()).pow(-2)) * gradwrtoutput
def param(self):
return [(None, None)]
## Activation functions
# ReLU
class ReLU(Module):
def __init__(self) -> None:
super().__init__()
......@@ -124,10 +121,15 @@ class ReLU(Module):
def forward(self, input):
self.x = input
self.x[self.x <= 0] = 0
# if x<=0 then 0
# if x>0 then x
self.x[self.x <= 0] = 0
return self.x
def backward(self, gradwrtoutput):
# gradient of ReLU
# if x<=0 then 0
# if x>0 then 1
self.x[self.x <= 0] = 0
self.x[self.x > 0] = 1
return self.x * gradwrtoutput
......@@ -135,7 +137,7 @@ class ReLU(Module):
def param(self):
return [(None, None)]
# leakyReLU
class leakyReLU(Module):
def __init__(self) -> None:
super().__init__()
......@@ -143,10 +145,15 @@ class leakyReLU(Module):
def forward(self, input, a=0.1):
self.x = input
# if x>=0 then x
# if x<0 then a*x
self.x[self.x < 0] = self.x[self.x < 0] * a
return self.x
def backward(self, gradwrtoutput, a=0.1):
# gradient of leaky ReLU (not differntiable for 0)
# if x>0 then 1
# if x<0 then a
self.x[self.x > 0] = 1
self.x[self.x < 0] = a
return self.x * gradwrtoutput
......@@ -154,7 +161,25 @@ class leakyReLU(Module):
def param(self):
return [(None, None)]
# Tanh
class Tanh(Module):
def __init__(self) -> None:
super().__init__()
self.x = None
def forward(self, input):
self.x = input
return torch.tanh(self.x)
# return (self.x.exp() - self.x.mul(-1).exp()) / (self.x.exp() + self.x.mul(-1).exp())
def backward(self, gradwrtoutput):
# gradient of tanh = ((1 - tanh^2(x)) * gradwrtoutput)
return (1 - self.forward(self.x).pow(2)) * gradwrtoutput
def param(self):
return [(None, None)]
# Sigmoid
class Sigmoid(Module):
def __init__(self) -> None:
super().__init__()
......@@ -162,47 +187,50 @@ class Sigmoid(Module):
def forward(self, input):
self.x = input
self.x = torch.ones(self.x.size()) / (torch.ones(self.x.size()) + torch.exp(-self.x))
# sigmoid = 1 / (1 + e^(-x))
self.x = 1 / (1 + torch.exp(-self.x))
return self.x
def backward(self, gradwrtoutput):
return self.forward(self.x) * (torch.ones(self.x.size()) - self.forward(self.x)) * gradwrtoutput
# gradient of sigmoid = x * (1 - sigmoid(x))
return self.forward(self.x) * (1 - self.forward(self.x)) * gradwrtoutput
def param(self):
return [(None, None)]
## Loss functions
# MSE
class MSE(Module):
def __init__(self) -> None:
super().__init__()
def forward(self, y, t): # output, target
def forward(self, y, t):
# MSE = sum((y - t)^2)
return torch.pow(torch.sub(y, t), 2).sum()
def backward(self, y, t):
# gradient of MSE = mean((2 * (y - t)))
return (2 * (y - t)).mean()
def param(self):
return [(None, None)]
# Binary Cross Entropy
class BCE(Module):
def __init__(self) -> None:
super().__init__()
self.y = None
self.t = None
def forward(self, y, t): # output, target
return -torch.sum(y*torch.clamp(torch.log(t), min=-1) + (1-y)*torch.clamp(torch.log(1-t), min=-1))
def forward(self, y, t):
# BCE = - sum(y * log(t) + (1 - y) * log(1 - t))
# clamp log in order to avoid inf values (just as PyTorch implements BCE, but with min=-100)
return -1 * torch.sum(y*torch.clamp(torch.log(t.float()), min=-1) + (1-y)*torch.clamp(torch.log(1-t.float()), min=-1))
def backward(self, y, t):
# gradient of BCE = mean(y - t)
return (y - t).mean()
def param(self):
return [(None, None)]
""" def forward(self, inp, target):
shift = inp - torch.max(inp, 1)[0].view(-1, 1)
num = torch.exp(shift)
self.pred = num / torch.sum(num, 1, keepdim=True)
"""
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment