broadcasting gradient

This commit is contained in:
monoid 2021-02-13 00:26:53 +09:00
parent d8198c522b
commit e7e9241d23
2 changed files with 43 additions and 41 deletions

View File

@ -6,6 +6,31 @@ import io
#only scalar gradient #only scalar gradient
#op must be tree. 그래프 구현할려면, 위상정렬해서 순회해야하기 때문에 그렇게 하지 않음. #op must be tree. 그래프 구현할려면, 위상정렬해서 순회해야하기 때문에 그렇게 하지 않음.
def broadcasting_be(a,b):
i = len(a)-1
j = len(b)-1
abroad = []
bbroad = []
while i >= 0 and j >= 0:
if a[i] == b[j]:
abroad.insert(0,1)
bbroad.insert(0,1)
elif a[i] == 1 or b[j] == 1:
abroad.insert(0,b[j])
bbroad.insert(0,a[i])
else:
raise ValueError
i -= 1
j -= 1
while i >= 0:
bbroad.insert(0,a[i])
i -= 1
while j >= 0:
abroad.insert(0,b[j])
j -= 1
return abroad, bbroad
class NonExistVarableError(ValueError): class NonExistVarableError(ValueError):
pass pass
@ -91,13 +116,15 @@ class AddOp(OpTree):
return self.v return self.v
def backprop(self,seed): def backprop(self,seed):
#borad_casted = self.a.shape != self.b.shape
#np.ones((1,b.shape[1]))
#a + b #a + b
ashape, bshape = broadcasting_be(self.a.numpy().shape,self.b.numpy().shape)
aai = np.where(np.array(ashape) != 1)
bbi = np.where(np.array(bshape) != 1)
if isinstance(self.a,OpTree): if isinstance(self.a,OpTree):
self.a.backprop(seed) self.a.backprop(np.sum(seed,axis=tuple(aai[0])))
if isinstance(self.b,OpTree): if isinstance(self.b,OpTree):
self.b.backprop(seed) self.b.backprop(np.sum(seed,axis=tuple(bbi[0])))
def addmul(a,b): def addmul(a,b):
return AddOp(a,b) return AddOp(a,b)
@ -163,30 +190,3 @@ class Variable(OpTree):
writer.write(f'{id(self)}["Variable{self.x.shape}"]\n') writer.write(f'{id(self)}["Variable{self.x.shape}"]\n')
def backprop(self,seed): def backprop(self,seed):
self.grad = seed self.grad = seed
"""
input_var = Variable(np.array([[1],[2],[3]]))
weight = Variable(np.array([[2,-1,1]]))
v = relu(weight @ input_var)
print(f"result : {v.numpy()}")
v.backprop(np.ones(()))
print(f"grad input : {input_var.grad}, w : {weight.grad}")
"""
#input_diff = Variable(np.array([[1.01],[2],[3]]))
#v_diff = relu(weight @ input_diff)
#print(f"diff 1 : {(np.sum(v_diff.numpy()) - v.numpy()) / 0.01}")
#i -= grad * delta
"""
graph TD
2284612545696["Variable(1, 3)"]
2284612545696-->2284612624880[MatmulOp]
2284612544496["Variable(3, 2)"]
2284612544496-->2284612624880[MatmulOp]
2284612624880-->2284612625072[FunctionReluOp]
2284612625072-->2284612627856[MatmulOp]
2284612627856-->Result
"""

22
p2.py
View File

@ -1,7 +1,7 @@
from layer import * from layer import *
import numpy as np import numpy as np
import pickle import pickle
"""
DIMENTION = 3 DIMENTION = 3
VAR_RANGE = 1 VAR_RANGE = 1
N = 10 N = 10
@ -18,15 +18,17 @@ y = or_weight @ input_x + or_bias
error = gen.normal(0,SIGMA,size = (1,N)) error = gen.normal(0,SIGMA,size = (1,N))
y += error y += error
print(y) print(y)
"""
input_var = Variable(np.array([[1,2,3],[1,5,0]]))
weight = Variable(np.array([[2],[-1],[1]]))
bias = Variable(np.array([1]))
v = relu((input_var @ weight) + bias)
#print(v.numpy())
#print(v.numpy().shape, np.array([[1,1]]).shape)
k = matmul(np.array([[1,1]]), v)
print(make_mermaid_graph(k))
input_var = Variable(np.array([[1],[2],[3]])) print(f"result : {k.numpy()}")
weight = Variable(np.array([[2,-1,1]])) k.backprop(np.ones(()))
bias = Variable(np.array([[1]]))
v = ((weight @ input_var) + bias)
print(make_mermaid_graph(v))
print(f"result : {v.numpy()}")
v.backprop(np.ones(()))
print(f"grad input : {input_var.grad}, w : {weight.grad}, b : {bias.grad}") print(f"grad input : {input_var.grad}, w : {weight.grad}, b : {bias.grad}")