sgd_hw/p4.py

183 lines
5.5 KiB
Python
Raw Normal View History

2021-02-13 13:20:59 +09:00
from sklearn import datasets
import numpy as np
2021-02-13 23:26:40 +09:00
from layer import *
import os
import pickle
import matplotlib
import matplotlib.pyplot as plt
import random
2021-02-20 15:25:54 +09:00
import itertools
import math
#matplotlib.use("TkAgg")
2021-02-13 23:26:40 +09:00
PICKLE_DATA_FILENAME = "mnist.pickle"
if not os.path.exists(PICKLE_DATA_FILENAME):
X, y = datasets.fetch_openml('mnist_784', return_X_y=True, cache=True, as_frame= False)
with open(PICKLE_DATA_FILENAME,"wb") as file:
pickle.dump(X,file)
pickle.dump(y,file)
else:
with open(PICKLE_DATA_FILENAME,"rb") as file:
X = pickle.load(file)
y = pickle.load(file)
i = random.randint(0,len(X) - 1)
#plt.imshow(X[0].reshape(28,28),cmap='gray',interpolation='none')
#plt.show()
#simple normalize
X = X / 255
y = np.array([int(i) for i in y])
Y = np.eye(10)[y]
2021-02-13 13:20:59 +09:00
2021-02-20 15:25:54 +09:00
train_x,train_y = X[0:3500*17], Y[0:3500*17]
dev_x,dev_y = X[3500*17:3500*18], Y[3500*17:3500*18]
test_x,test_y = X[3500*18:3500*20], Y[3500*18:3500*20]
2021-02-13 13:20:59 +09:00
gen:np.random.Generator = np.random.default_rng()
2021-02-20 15:25:54 +09:00
eta = 0.0001
2021-02-13 23:26:40 +09:00
2021-02-20 15:25:54 +09:00
MiniBatchN = 32
2021-02-13 23:26:40 +09:00
2021-02-20 15:25:54 +09:00
class CheckPoint:
def __init__(self,param,accuracy,loss,iteration):
super().__init__()
self.param = param
self.accuracy = accuracy
self.loss = loss
self.iteration = iteration
2021-02-13 23:26:40 +09:00
2021-02-20 15:25:54 +09:00
class Model:
def __init__(self, layerDim:[int]):
super().__init__()
gen:np.random.Generator = np.random.default_rng()
self.layerDim = layerDim
self.param = []
self.checkpoints = []
self.iteration = 0
front = 784
for sd in layerDim:
back = sd
weight = Variable(gen.normal(0,1,size=(front,back)))
bias = Variable(gen.normal(0,1,size=(back)))
self.param.append((weight,bias))
front = back
def caculate(self,input_x,y):
input_var = Variable(input_x)
Z = input_var
for i,(w,b) in enumerate(self.param):
U = Z @ w + b
if i < len(self.param) - 1:
Z = relu(U)
else:
Z = U
J = SoftmaxWithNegativeLogLikelihood(Z,y)
return J
def train_one_iterate(self,input_x,y,eta):
#forward pass
J = self.caculate(input_x,y)
#backpropagation
J.backprop(np.ones(()))
for i,(w,b) in enumerate(self.param):
w = Variable(w.numpy() - (w.grad) * eta)
b = Variable(b.numpy() - (b.grad) * eta)
self.param[i] = (w,b)
self.iteration += 1
return J
def get_loss_and_confusion(self,input_x,y):
J = self.caculate(input_x,y)
2021-02-13 23:26:40 +09:00
s = J.softmax_numpy()
s = np.round(s)
2021-02-20 15:25:54 +09:00
confusion = (np.transpose(y)@s)
return J.numpy(), confusion
def set_checkpoint(self,dev_x,dev_y):
J = self.caculate(dev_x,dev_y)
loss = np.average(J.numpy())
print(f"check point #{len(self.checkpoints)}")
print(self.iteration,'iteration : avg loss : ',loss)
confusion = get_confusion(J)
accuracy = get_accuracy_from_confusion(confusion)
print('accuracy : {:.2f}%'.format(accuracy * 100))
self.checkpoints.append(CheckPoint(
self.param,
accuracy*100,
loss,
self.iteration
))
def get_confusion(J:SoftmaxWithNegativeLogLikelihood):
s = J.softmax_numpy()
s = np.eye(10)[np.argmax(s,axis=len(s.shape)-1)]
confusion = (np.transpose(J.y)@s)
return confusion
def get_accuracy_from_confusion(confusion):
return np.trace(confusion).sum() / np.sum(confusion)
def model_filename(layerDim:[int]):
return f"model{layerDim}.pickle"
2021-02-13 13:20:59 +09:00
2021-02-20 15:25:54 +09:00
def save_model(model:Model):
with open(model_filename(model.layerDim),"wb") as model_file:
pickle.dump(model,model_file)
def load_or_create_model(layerDim:list):
model_name = model_filename(layerDim)
if os.path.exists(model_name):
with open(model_name,"rb") as model_file:
return pickle.load(model_file)
else:
return Model(layerDim)
model = load_or_create_model([300,300,100,10])
accuracy_list = []
loss_list = []
iteration_list = []
end_n = math.floor(3500*17 /MiniBatchN)
for epoch in range(1):
#one epoch
for iteration in range(0,end_n):
choiced_index = gen.choice(range(0,len(train_x)),MiniBatchN)
batch_x = train_x[choiced_index]
batch_y = train_y[choiced_index]
#batch_x = train_x[MiniBatchN*iteration:MiniBatchN*(iteration+1)]
#batch_y = train_y[MiniBatchN*iteration:MiniBatchN*(iteration+1)]
model.train_one_iterate(batch_x,batch_y,eta)
if (model.iteration-1) % 200 == 0:
model.set_checkpoint(dev_x,dev_y)
if (model.iteration) % 10 == 0:
print(f"iteration {model.iteration+1}")
J = model.caculate(test_x,test_y)
loss = np.average(J.numpy())
print('testset : avg loss : ',loss)
confusion = get_confusion(J)
accuracy = get_accuracy_from_confusion(confusion)
print('accuracy : {:.2f}%'.format(accuracy * 100))
if True:
save_model(model)
plt.subplot(1,2,1)
2021-02-13 23:26:40 +09:00
plt.title("accuracy")
2021-02-20 15:25:54 +09:00
plt.plot([*map(lambda x: x.iteration,model.checkpoints)],
[*map(lambda x: x.accuracy,model.checkpoints)]
)
plt.subplot(1,2,2)
plt.title("loss")
plt.plot([*map(lambda x: x.iteration,model.checkpoints)],
[*map(lambda x: x.loss,model.checkpoints)])
2021-02-13 23:26:40 +09:00
plt.show()
2021-02-20 15:25:54 +09:00
2021-02-13 23:26:40 +09:00
plt.title("confusion matrix")
2021-02-20 15:25:54 +09:00
plt.imshow(confusion,cmap='Blues')
plt.colorbar()
for i,j in itertools.product(range(confusion.shape[0]),range(confusion.shape[1])):
plt.text(j,i,"{:}".format(confusion[i,j]),horizontalalignment="center",color="white" if i == j else "black")
2021-02-13 23:26:40 +09:00
plt.show()