Compare commits

..

No commits in common. "8a1442995b0c9f120ca907b1009c2bc7ad969e85" and "bc504fce747060a5d4ef4c7ce6f23abdf829238f" have entirely different histories.

4 changed files with 99 additions and 658 deletions

1
.gitignore vendored
View File

@ -3,4 +3,3 @@ nsmc.zip
.ipynb_checkpoints/**/* .ipynb_checkpoints/**/*
__pycache__/**/* __pycache__/**/*
model.zip model.zip
model/**/*

View File

@ -3,7 +3,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"id": "5a4a1e30", "id": "c916dd3b",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -25,7 +25,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"id": "710cd5b2", "id": "d5861234",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -39,7 +39,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 3,
"id": "da018ffe", "id": "5accd3a9",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -68,7 +68,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "69f05cf6", "id": "d10fcb83",
"metadata": {}, "metadata": {},
"source": [ "source": [
"data를 준비" "data를 준비"
@ -77,7 +77,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 7,
"id": "961edd10", "id": "552fe555",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -114,7 +114,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "4178b576", "id": "1cff8e03",
"metadata": {}, "metadata": {},
"source": [ "source": [
"간단한 collate function" "간단한 collate function"
@ -123,7 +123,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "a5ff0049", "id": "89eb64d8",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []

File diff suppressed because one or more lines are too long

View File

@ -7,15 +7,13 @@ from ndata import readNsmcRawData, NsmcRawData
def readNsmcDataAll(): def readNsmcDataAll():
""" """
Returns: train, dev, test Returns: train, test
""" """
print("read train set", file=sys.stderr) print("read train set", file=sys.stderr)
train = readNsmcRawData("nsmc/nsmc-master/ratings_train.txt",use_tqdm=True,total=150_000) train = readNsmcRawData("nsmc/nsmc-master/ratings_train.txt",use_tqdm=True,total=150_000)
print("read test set", file=sys.stderr) print("read test set", file=sys.stderr)
testBig = readNsmcRawData("nsmc/nsmc-master/ratings_test.txt",use_tqdm=True,total=50_000) test = readNsmcRawData("nsmc/nsmc-master/ratings_test.txt",use_tqdm=True,total=50_000)
test = testBig[:30_000] return NsmcDataset(train),NsmcDataset(test)
dev = testBig[30_000:]
return NsmcDataset(train),NsmcDataset(dev),NsmcDataset(test)
class NsmcDataset(Dataset): class NsmcDataset(Dataset):
def __init__(self, data: List[NsmcRawData]): def __init__(self, data: List[NsmcRawData]):