diff --git a/layer.py b/layer.py index f8f31ce..d892ea0 100644 --- a/layer.py +++ b/layer.py @@ -82,11 +82,9 @@ class MatMulOp(OpTree): b = self.b.numpy() if isinstance(self.b,OpTree) else self.b if isinstance(self.a,OpTree): s = seed * np.transpose(b) if seed.shape == () else (seed) @ np.transpose(b) - #print('seed : ', s) self.a.backprop((s)) if isinstance(self.b,OpTree): s = np.transpose(a) * seed if seed.shape == () else np.transpose(a) @ seed - #print('seed : ', s) self.b.backprop(s) def matmul(a,b): @@ -179,18 +177,20 @@ def relu(v): return FunctionOp(relu_f,relu_diff,"Relu",v) #row vector def softmaxHelp(i): - e = np.exp(i) - sumofe = np.sum(e,axis=e.ndim - 1) + m = np.max(i,axis=i.ndim-1) + e = np.exp(i-m.reshape(*m.shape,1)) + sumofe = np.sum(e,axis=i.ndim - 1) sumofe = sumofe.reshape(*sumofe.shape,1) return e / sumofe class SoftmaxWithNegativeLogLikelihood(OpTree): #row vector def __init__(self, i, y): super().__init__() + epsilon = 1e-12 self.i = i self.s = softmaxHelp(i.numpy()) self.y = y - self.v = -y*np.log(self.s) + self.v = -y*np.log(self.s+epsilon) self.v = np.sum(self.v,axis=self.v.ndim-1) def __str__(self):