broadcasting gradient
This commit is contained in:
parent
d8198c522b
commit
e7e9241d23
62
layer.py
62
layer.py
@ -6,6 +6,31 @@ import io
|
|||||||
#only scalar gradient
|
#only scalar gradient
|
||||||
#op must be tree. 그래프 구현할려면, 위상정렬해서 순회해야하기 때문에 그렇게 하지 않음.
|
#op must be tree. 그래프 구현할려면, 위상정렬해서 순회해야하기 때문에 그렇게 하지 않음.
|
||||||
|
|
||||||
|
def broadcasting_be(a,b):
|
||||||
|
i = len(a)-1
|
||||||
|
j = len(b)-1
|
||||||
|
abroad = []
|
||||||
|
bbroad = []
|
||||||
|
while i >= 0 and j >= 0:
|
||||||
|
if a[i] == b[j]:
|
||||||
|
abroad.insert(0,1)
|
||||||
|
bbroad.insert(0,1)
|
||||||
|
elif a[i] == 1 or b[j] == 1:
|
||||||
|
abroad.insert(0,b[j])
|
||||||
|
bbroad.insert(0,a[i])
|
||||||
|
else:
|
||||||
|
raise ValueError
|
||||||
|
i -= 1
|
||||||
|
j -= 1
|
||||||
|
while i >= 0:
|
||||||
|
bbroad.insert(0,a[i])
|
||||||
|
i -= 1
|
||||||
|
while j >= 0:
|
||||||
|
abroad.insert(0,b[j])
|
||||||
|
j -= 1
|
||||||
|
return abroad, bbroad
|
||||||
|
|
||||||
|
|
||||||
class NonExistVarableError(ValueError):
|
class NonExistVarableError(ValueError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -91,13 +116,15 @@ class AddOp(OpTree):
|
|||||||
return self.v
|
return self.v
|
||||||
|
|
||||||
def backprop(self,seed):
|
def backprop(self,seed):
|
||||||
#borad_casted = self.a.shape != self.b.shape
|
|
||||||
#np.ones((1,b.shape[1]))
|
|
||||||
#a + b
|
#a + b
|
||||||
|
ashape, bshape = broadcasting_be(self.a.numpy().shape,self.b.numpy().shape)
|
||||||
|
aai = np.where(np.array(ashape) != 1)
|
||||||
|
bbi = np.where(np.array(bshape) != 1)
|
||||||
if isinstance(self.a,OpTree):
|
if isinstance(self.a,OpTree):
|
||||||
self.a.backprop(seed)
|
self.a.backprop(np.sum(seed,axis=tuple(aai[0])))
|
||||||
if isinstance(self.b,OpTree):
|
if isinstance(self.b,OpTree):
|
||||||
self.b.backprop(seed)
|
self.b.backprop(np.sum(seed,axis=tuple(bbi[0])))
|
||||||
|
|
||||||
|
|
||||||
def addmul(a,b):
|
def addmul(a,b):
|
||||||
return AddOp(a,b)
|
return AddOp(a,b)
|
||||||
@ -163,30 +190,3 @@ class Variable(OpTree):
|
|||||||
writer.write(f'{id(self)}["Variable{self.x.shape}"]\n')
|
writer.write(f'{id(self)}["Variable{self.x.shape}"]\n')
|
||||||
def backprop(self,seed):
|
def backprop(self,seed):
|
||||||
self.grad = seed
|
self.grad = seed
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
input_var = Variable(np.array([[1],[2],[3]]))
|
|
||||||
weight = Variable(np.array([[2,-1,1]]))
|
|
||||||
v = relu(weight @ input_var)
|
|
||||||
print(f"result : {v.numpy()}")
|
|
||||||
v.backprop(np.ones(()))
|
|
||||||
print(f"grad input : {input_var.grad}, w : {weight.grad}")
|
|
||||||
"""
|
|
||||||
|
|
||||||
#input_diff = Variable(np.array([[1.01],[2],[3]]))
|
|
||||||
#v_diff = relu(weight @ input_diff)
|
|
||||||
#print(f"diff 1 : {(np.sum(v_diff.numpy()) - v.numpy()) / 0.01}")
|
|
||||||
|
|
||||||
#i -= grad * delta
|
|
||||||
|
|
||||||
"""
|
|
||||||
graph TD
|
|
||||||
2284612545696["Variable(1, 3)"]
|
|
||||||
2284612545696-->2284612624880[MatmulOp]
|
|
||||||
2284612544496["Variable(3, 2)"]
|
|
||||||
2284612544496-->2284612624880[MatmulOp]
|
|
||||||
2284612624880-->2284612625072[FunctionReluOp]
|
|
||||||
2284612625072-->2284612627856[MatmulOp]
|
|
||||||
2284612627856-->Result
|
|
||||||
"""
|
|
22
p2.py
22
p2.py
@ -1,7 +1,7 @@
|
|||||||
from layer import *
|
from layer import *
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pickle
|
import pickle
|
||||||
|
"""
|
||||||
DIMENTION = 3
|
DIMENTION = 3
|
||||||
VAR_RANGE = 1
|
VAR_RANGE = 1
|
||||||
N = 10
|
N = 10
|
||||||
@ -18,15 +18,17 @@ y = or_weight @ input_x + or_bias
|
|||||||
error = gen.normal(0,SIGMA,size = (1,N))
|
error = gen.normal(0,SIGMA,size = (1,N))
|
||||||
y += error
|
y += error
|
||||||
print(y)
|
print(y)
|
||||||
|
"""
|
||||||
|
|
||||||
|
input_var = Variable(np.array([[1,2,3],[1,5,0]]))
|
||||||
|
weight = Variable(np.array([[2],[-1],[1]]))
|
||||||
|
bias = Variable(np.array([1]))
|
||||||
|
v = relu((input_var @ weight) + bias)
|
||||||
|
#print(v.numpy())
|
||||||
|
#print(v.numpy().shape, np.array([[1,1]]).shape)
|
||||||
|
k = matmul(np.array([[1,1]]), v)
|
||||||
|
print(make_mermaid_graph(k))
|
||||||
|
|
||||||
input_var = Variable(np.array([[1],[2],[3]]))
|
print(f"result : {k.numpy()}")
|
||||||
weight = Variable(np.array([[2,-1,1]]))
|
k.backprop(np.ones(()))
|
||||||
bias = Variable(np.array([[1]]))
|
|
||||||
v = ((weight @ input_var) + bias)
|
|
||||||
|
|
||||||
print(make_mermaid_graph(v))
|
|
||||||
|
|
||||||
print(f"result : {v.numpy()}")
|
|
||||||
v.backprop(np.ones(()))
|
|
||||||
print(f"grad input : {input_var.grad}, w : {weight.grad}, b : {bias.grad}")
|
print(f"grad input : {input_var.grad}, w : {weight.grad}, b : {bias.grad}")
|
||||||
|
Loading…
Reference in New Issue
Block a user