91 lines
3.1 KiB
Python
91 lines
3.1 KiB
Python
import pandas as pd
|
|
import bs4
|
|
import requests
|
|
import re
|
|
import multiprocessing as mp
|
|
import sqlite3
|
|
import datetime
|
|
|
|
def get_naver_finance_price(code,page=1):
|
|
#url = (f'https://finance.naver.com/item/sise_day.nhn?code={code}&page={page}')
|
|
url = 'https://finance.naver.com/item/sise_day.nhn'
|
|
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
|
|
# print(url)
|
|
html = requests.get(url,params={'code':code,'page':page},headers=headers)
|
|
if html.status_code != 200:
|
|
raise UserWarning(html.status_code)
|
|
return html.text
|
|
|
|
stock_h = ['날짜','종가','전일비','시가','고가','저가','거래량']
|
|
def get_data(soup,date):
|
|
nums = soup.select(".tah")
|
|
i = 0
|
|
ret=[pd.DataFrame(columns=stock_h)]
|
|
nums = [*map(lambda x:x.text.replace(',','').strip(),nums)]
|
|
while True:
|
|
m = nums[i:(i+7)]
|
|
if not m:
|
|
break
|
|
#for ISO 8601
|
|
m[0] = m[0].replace(".","-")
|
|
#date
|
|
if m[0] <= date:
|
|
return pd.concat(ret,ignore_index=True),True
|
|
|
|
ret.append(pd.DataFrame([m],columns=stock_h))
|
|
i += 7
|
|
return pd.concat(ret,ignore_index=True),False
|
|
def get_last_page(soup):
|
|
a = soup.select_one('.pgRR a')
|
|
if a is None:
|
|
index_list = soup.select('td a')
|
|
return len(index_list)
|
|
href = a.attrs['href']
|
|
p = re.compile(r"page=(\d*)")
|
|
g = p.search(href)
|
|
return g.groups()[0]
|
|
|
|
def croll_naver_page(code,page,date):
|
|
html_text = get_naver_finance_price(code,page)
|
|
soup = bs4.BeautifulSoup(html_text,'html.parser')
|
|
return get_data(soup,date)
|
|
|
|
def croll_naver_page_all(code,date) -> pd.DataFrame:
|
|
html_text = get_naver_finance_price(code)
|
|
#print(html_text)
|
|
s = bs4.BeautifulSoup(html_text,'html.parser')
|
|
last = int(get_last_page(s))
|
|
r = [(code,i) for i in range(1,last+1)]
|
|
retdata = []
|
|
for c,pagenum in r:
|
|
d,is_end = croll_naver_page(c,pagenum,date)
|
|
if is_end:
|
|
retdata.append(d)
|
|
break
|
|
retdata.append(d)
|
|
if len(retdata) == 0:
|
|
return []
|
|
return pd.concat(retdata,ignore_index=True)
|
|
#with mp.Pool(CPU_COUNT) as pl:
|
|
# dl = pl.starmap(croll_naver_page,r)
|
|
# return pd.concat(dl,ignore_index=True)
|
|
|
|
def toSqlPos(x,code):
|
|
return (code,x["날짜"],x["종가"],x["전일비"],x["시가"],x["고가"],x["저가"],x["거래량"])
|
|
|
|
if __name__ == '__main__':
|
|
db = sqlite3.connect("stock.db")
|
|
today = datetime.date.today()
|
|
|
|
krx_stock_rows = [(i,code,last_update) for i,(code,last_update) in enumerate(db.execute("""SELECT Code,LastUpdate From KRXCorp"""))]
|
|
total = len(krx_stock_rows)
|
|
for i,code,last_update in krx_stock_rows:
|
|
print(f"{total}/{i}: code {code} : {last_update}")
|
|
if last_update == today.isoformat():
|
|
continue
|
|
d = croll_naver_page_all(code,last_update)
|
|
cursor = db.cursor()
|
|
if len(d)> 0:
|
|
cursor.executemany("INSERT INTO STOCK (Code,Date,Close,Diff,Open,High,Low,Volume) VALUES (?,?,?,?,?,?,?,?)",[toSqlPos(x,code) for i,x in d.iterrows() ])
|
|
cursor.execute("""UPDATE KRXCorp Set LastUpdate = ? WHERE Code = ?""",(today.isoformat(),code))
|
|
db.commit() |