This commit is contained in:
monoid 2022-02-23 17:48:39 +09:00
commit 99a88c6da5
4 changed files with 33 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
nsmc/**/*
nsmc.zip

2
README.md Normal file
View File

@ -0,0 +1,2 @@
# nsmc bert

25
download.py Normal file
View File

@ -0,0 +1,25 @@
import requests
import tqdm
import zipfile
import os
def downloadNsmc(filename = "nsmc.zip"):
URL = "https://github.com/e9t/nsmc/archive/refs/heads/master.zip"
headReq = requests.head(URL, headers={'Accept-Encoding': None})
length = headReq.headers["Content-Length"]
with requests.get(URL, stream= True) as nsmc:
nsmc.raise_for_status()
with open(filename ,"wb") as fp:
t = tqdm.tqdm(total=int(length), unit='byte', desc=filename)
for chunk in nsmc.iter_content(chunk_size = 8192):
fp.write(chunk)
t.update(len(chunk))
t.close()
if __name__ == "__main__":
os.makedirs("nsmc")
if not os.path.exists("nsmc.zip"):
downloadNsmc()
with zipfile.ZipFile("nsmc.zip") as nsmc:
nsmc.extractall("nsmc")

4
ndata.py Normal file
View File

@ -0,0 +1,4 @@
import os
BASE_PATH = "nsmc/nsmc-master"