diff --git a/.gitignore b/.gitignore index 77c7599..1726612 100644 --- a/.gitignore +++ b/.gitignore @@ -140,4 +140,5 @@ cython_debug/ sgd_hw/ -mnist.pickle \ No newline at end of file +mnist.pickle +*.pickle \ No newline at end of file diff --git a/layer.py b/layer.py index d892ea0..8f0f9ae 100644 --- a/layer.py +++ b/layer.py @@ -186,7 +186,7 @@ class SoftmaxWithNegativeLogLikelihood(OpTree): #row vector def __init__(self, i, y): super().__init__() - epsilon = 1e-12 + epsilon = 1e-15 self.i = i self.s = softmaxHelp(i.numpy()) self.y = y diff --git a/mnist_load.py b/mnist_load.py new file mode 100644 index 0000000..bd0b8d5 --- /dev/null +++ b/mnist_load.py @@ -0,0 +1,41 @@ +import os +import pickle +import random +from sklearn import datasets +import numpy as np + +PICKLE_DATA_FILENAME = "mnist.pickle" + +train_x = None +train_y = None +dev_x = None +dev_y = None +test_x = None +test_y = None + +def load_mnistdata(): + global train_x, train_y, dev_x, dev_y, test_x, test_y + if not os.path.exists(PICKLE_DATA_FILENAME): + X, y = datasets.fetch_openml('mnist_784', return_X_y=True, cache=True, as_frame= False) + with open(PICKLE_DATA_FILENAME,"wb") as file: + pickle.dump(X,file) + pickle.dump(y,file) + else: + with open(PICKLE_DATA_FILENAME,"rb") as file: + X = pickle.load(file) + y = pickle.load(file) + + #i = random.randint(0,len(X) - 1) + #plt.imshow(X[0].reshape(28,28),cmap='gray',interpolation='none') + #plt.show() + + #simple normalize + X = X / 255 + + y = np.array([int(i) for i in y]) + Y = np.eye(10)[y] + + train_x,train_y = X[0:3500*17], Y[0:3500*17] + dev_x,dev_y = X[3500*17:3500*18], Y[3500*17:3500*18] + test_x,test_y = X[3500*18:3500*20], Y[3500*18:3500*20] + return ((train_x, train_y),(dev_x,dev_y),(test_x,test_y)) \ No newline at end of file diff --git a/p4.py b/p4.py index 5bad287..a8c5cde 100644 --- a/p4.py +++ b/p4.py @@ -8,136 +8,23 @@ import matplotlib.pyplot as plt import random import itertools import math +import mnist_load +from p4_model import * #matplotlib.use("TkAgg") -PICKLE_DATA_FILENAME = "mnist.pickle" -if not os.path.exists(PICKLE_DATA_FILENAME): - X, y = datasets.fetch_openml('mnist_784', return_X_y=True, cache=True, as_frame= False) - with open(PICKLE_DATA_FILENAME,"wb") as file: - pickle.dump(X,file) - pickle.dump(y,file) -else: - with open(PICKLE_DATA_FILENAME,"rb") as file: - X = pickle.load(file) - y = pickle.load(file) +train_set, dev_set, test_set = mnist_load.load_mnistdata() -i = random.randint(0,len(X) - 1) -#plt.imshow(X[0].reshape(28,28),cmap='gray',interpolation='none') -#plt.show() - -#simple normalize -X = X / 255 - -y = np.array([int(i) for i in y]) -Y = np.eye(10)[y] - -train_x,train_y = X[0:3500*17], Y[0:3500*17] -dev_x,dev_y = X[3500*17:3500*18], Y[3500*17:3500*18] -test_x,test_y = X[3500*18:3500*20], Y[3500*18:3500*20] +train_x,train_y = train_set +dev_x,dev_y = dev_set +test_x,test_y = test_set gen:np.random.Generator = np.random.default_rng() -eta = 0.0001 +eta = 0.00001 MiniBatchN = 32 -class CheckPoint: - def __init__(self,param,accuracy,loss,iteration): - super().__init__() - self.param = param - self.accuracy = accuracy - self.loss = loss - self.iteration = iteration +model = load_or_create_model([300,10]) -class Model: - def __init__(self, layerDim:[int]): - super().__init__() - gen:np.random.Generator = np.random.default_rng() - self.layerDim = layerDim - self.param = [] - self.checkpoints = [] - self.iteration = 0 - front = 784 - for sd in layerDim: - back = sd - weight = Variable(gen.normal(0,1,size=(front,back))) - bias = Variable(gen.normal(0,1,size=(back))) - self.param.append((weight,bias)) - front = back - - def caculate(self,input_x,y): - input_var = Variable(input_x) - Z = input_var - for i,(w,b) in enumerate(self.param): - U = Z @ w + b - if i < len(self.param) - 1: - Z = relu(U) - else: - Z = U - J = SoftmaxWithNegativeLogLikelihood(Z,y) - return J - def train_one_iterate(self,input_x,y,eta): - #forward pass - J = self.caculate(input_x,y) - #backpropagation - J.backprop(np.ones(())) - for i,(w,b) in enumerate(self.param): - w = Variable(w.numpy() - (w.grad) * eta) - b = Variable(b.numpy() - (b.grad) * eta) - self.param[i] = (w,b) - self.iteration += 1 - return J - - def get_loss_and_confusion(self,input_x,y): - J = self.caculate(input_x,y) - s = J.softmax_numpy() - s = np.round(s) - confusion = (np.transpose(y)@s) - return J.numpy(), confusion - - def set_checkpoint(self,dev_x,dev_y): - J = self.caculate(dev_x,dev_y) - loss = np.average(J.numpy()) - print(f"check point #{len(self.checkpoints)}") - print(self.iteration,'iteration : avg loss : ',loss) - - confusion = get_confusion(J) - accuracy = get_accuracy_from_confusion(confusion) - print('accuracy : {:.2f}%'.format(accuracy * 100)) - self.checkpoints.append(CheckPoint( - self.param, - accuracy*100, - loss, - self.iteration - )) - -def get_confusion(J:SoftmaxWithNegativeLogLikelihood): - s = J.softmax_numpy() - s = np.eye(10)[np.argmax(s,axis=len(s.shape)-1)] - confusion = (np.transpose(J.y)@s) - return confusion - -def get_accuracy_from_confusion(confusion): - return np.trace(confusion).sum() / np.sum(confusion) - -def model_filename(layerDim:[int]): - return f"model{layerDim}.pickle" - -def save_model(model:Model): - with open(model_filename(model.layerDim),"wb") as model_file: - pickle.dump(model,model_file) - -def load_or_create_model(layerDim:list): - model_name = model_filename(layerDim) - if os.path.exists(model_name): - with open(model_name,"rb") as model_file: - return pickle.load(model_file) - else: - return Model(layerDim) -model = load_or_create_model([300,300,100,10]) - -accuracy_list = [] -loss_list = [] -iteration_list = [] end_n = math.floor(3500*17 /MiniBatchN) for epoch in range(1): @@ -154,7 +41,7 @@ for epoch in range(1): if (model.iteration) % 10 == 0: print(f"iteration {model.iteration+1}") -J = model.caculate(test_x,test_y) +J = model.caculate(dev_x,dev_y) loss = np.average(J.numpy()) print('testset : avg loss : ',loss) diff --git a/p4_model.py b/p4_model.py new file mode 100644 index 0000000..fd49bf1 --- /dev/null +++ b/p4_model.py @@ -0,0 +1,99 @@ +from layer import * +import numpy as np +import pickle +import os + +class CheckPoint: + def __init__(self,param,accuracy,loss,iteration): + super().__init__() + self.param = param + self.accuracy = accuracy + self.loss = loss + self.iteration = iteration + +class Model: + def __init__(self, layerDim:[int]): + super().__init__() + gen:np.random.Generator = np.random.default_rng() + self.layerDim = layerDim + self.param = [] + self.checkpoints = [] + self.iteration = 0 + #... + front = 784 + for sd in layerDim: + back = sd + weight = Variable(gen.normal(0,1,size=(front,back))) + bias = Variable(gen.normal(0,1,size=(back))) + self.param.append((weight,bias)) + front = back + + def caculate(self,input_x,y): + input_var = Variable(input_x) + Z = input_var + for i,(w,b) in enumerate(self.param): + U = Z @ w + b + if i < len(self.param) - 1: + Z = relu(U) + else: + Z = U + J = SoftmaxWithNegativeLogLikelihood(Z,y) + return J + def train_one_iterate(self,input_x,y,eta): + #forward pass + J = self.caculate(input_x,y) + #backpropagation + J.backprop(np.ones(())) + for i,(w,b) in enumerate(self.param): + w = Variable(w.numpy() - (w.grad) * eta) + b = Variable(b.numpy() - (b.grad) * eta) + self.param[i] = (w,b) + self.iteration += 1 + return J + + def get_loss_and_confusion(self,input_x,y): + J = self.caculate(input_x,y) + s = J.softmax_numpy() + s = np.round(s) + confusion = (np.transpose(y)@s) + return J.numpy(), confusion + + def set_checkpoint(self,dev_x,dev_y): + J = self.caculate(dev_x,dev_y) + loss = np.average(J.numpy()) + print(f"check point #{len(self.checkpoints)}") + print(self.iteration,'iteration : avg loss : ',loss) + + confusion = get_confusion(J) + accuracy = get_accuracy_from_confusion(confusion) + print('accuracy : {:.2f}%'.format(accuracy * 100)) + self.checkpoints.append(CheckPoint( + self.param, + accuracy*100, + loss, + self.iteration + )) + +def get_confusion(J:SoftmaxWithNegativeLogLikelihood): + s = J.softmax_numpy() + s = np.eye(10)[np.argmax(s,axis=len(s.shape)-1)] + confusion = (np.transpose(J.y)@s) + return confusion + +def get_accuracy_from_confusion(confusion): + return np.trace(confusion).sum() / np.sum(confusion) + +def model_filename(layerDim:[int]): + return f"model{layerDim}.pickle" + +def save_model(model:Model): + with open(model_filename(model.layerDim),"wb") as model_file: + pickle.dump(model,model_file) + +def load_or_create_model(layerDim:list): + model_name = model_filename(layerDim) + if os.path.exists(model_name): + with open(model_name,"rb") as model_file: + return pickle.load(model_file) + else: + return Model(layerDim) \ No newline at end of file diff --git a/p4_simple_heatmap.py b/p4_simple_heatmap.py new file mode 100644 index 0000000..cddcf73 --- /dev/null +++ b/p4_simple_heatmap.py @@ -0,0 +1,11 @@ +from p4_model import * +import matplotlib.pyplot as plt + +model = load_or_create_model([10]) + +heat = model.param[0][0].x.T + +for i in range(0,10): + print(f'{i} index') + plt.imshow(heat[i].reshape(28,28),cmap='gray',interpolation='none') + plt.show() \ No newline at end of file diff --git a/p4_test.py b/p4_test.py new file mode 100644 index 0000000..e7fb59d --- /dev/null +++ b/p4_test.py @@ -0,0 +1,54 @@ +from sklearn import datasets +import numpy as np +from layer import * +import os +import pickle +import matplotlib +import matplotlib.pyplot as plt +import random +import itertools +import math +import mnist_load +from p4_model import * +#matplotlib.use("TkAgg") +train_set, dev_set, test_set = mnist_load.load_mnistdata() + +train_x,train_y = train_set +dev_x,dev_y = dev_set +test_x,test_y = test_set + +gen:np.random.Generator = np.random.default_rng() +eta = 0.0001 + +MiniBatchN = 32 + +model = load_or_create_model([300,10]) + +end_n = math.floor(3500*17 /MiniBatchN) + +J = model.caculate(dev_x,dev_y) +loss = np.average(J.numpy()) +print(make_mermaid_graph(J)) +print('testset : avg loss : ',loss) + +confusion = get_confusion(J) +accuracy = get_accuracy_from_confusion(confusion) +print('accuracy : {:.2f}%'.format(accuracy * 100)) + +plt.subplot(1,2,1) +plt.title("accuracy") +plt.plot([*map(lambda x: x.iteration,model.checkpoints)], + [*map(lambda x: x.accuracy,model.checkpoints)] +) +plt.subplot(1,2,2) +plt.title("loss") +plt.plot([*map(lambda x: x.iteration,model.checkpoints)], + [*map(lambda x: x.loss,model.checkpoints)]) +plt.show() + +plt.title("confusion matrix") +plt.imshow(confusion,cmap='Blues') +plt.colorbar() +for i,j in itertools.product(range(confusion.shape[0]),range(confusion.shape[1])): + plt.text(j,i,"{:}".format(confusion[i,j]),horizontalalignment="center",color="white" if i == j else "black") +plt.show() \ No newline at end of file