import pandas as pd import bs4 import requests import re import multiprocessing as mp import sqlite3 import datetime def get_naver_finance_price(code,page=1): url = 'https://finance.naver.com/item/sise_day.nhn' headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"} html = requests.get(url,params={'code':code,'page':page},headers=headers) if html.status_code != 200: raise UserWarning(html.status_code) return html.text stock_h = ['날짜','종가','전일비','시가','고가','저가','거래량'] def get_data(soup,date): nums = soup.select(".tah") i = 0 ret=[pd.DataFrame(columns=stock_h)] nums = [*map(lambda x:x.text.replace(',','').strip(),nums)] while True: m = nums[i:(i+7)] if not m: break #for ISO 8601 m[0] = m[0].replace(".","-") #date if m[0] <= date: return pd.concat(ret,ignore_index=True),True ret.append(pd.DataFrame([m],columns=stock_h)) i += 7 return pd.concat(ret,ignore_index=True),False def get_last_page(soup): a = soup.select_one('.pgRR a') if a is None: index_list = soup.select('td a') return len(index_list) href = a.attrs['href'] p = re.compile(r"page=(\d*)") g = p.search(href) return g.groups()[0] def croll_naver_page(code,page,date): html_text = get_naver_finance_price(code,page) soup = bs4.BeautifulSoup(html_text,'html.parser') return get_data(soup,date) def croll_naver_page_all(code,date) -> pd.DataFrame: html_text = get_naver_finance_price(code) s = bs4.BeautifulSoup(html_text,'html.parser') last = int(get_last_page(s)) r = [(code,i) for i in range(1,last+1)] retdata = [] for c,pagenum in r: d,is_end = croll_naver_page(c,pagenum,date) if is_end: retdata.append(d) break retdata.append(d) if len(retdata) == 0: return [] return pd.concat(retdata,ignore_index=True) def toSqlPos(x,code): return (code,x["날짜"],x["종가"],x["전일비"],x["시가"],x["고가"],x["저가"],x["거래량"]) if __name__ == '__main__': db = sqlite3.connect("stock.db") today = datetime.date.today() krx_stock_rows = [(i,code,last_update) for i,(code,last_update) in enumerate(db.execute("""SELECT Code,LastUpdate From KRXCorp"""))] total = len(krx_stock_rows) for i,code,last_update in krx_stock_rows: print(f"{total}/{i}: code {code} : {last_update}") if last_update == today.isoformat(): continue d = croll_naver_page_all(code,last_update) cursor = db.cursor() if len(d)> 0: cursor.executemany("INSERT INTO STOCK (Code,Date,Close,Diff,Open,High,Low,Volume) VALUES (?,?,?,?,?,?,?,?)",[toSqlPos(x,code) for i,x in d.iterrows() ]) cursor.execute("""UPDATE KRXCorp Set LastUpdate = ? WHERE Code = ?""",(today.isoformat(),code)) db.commit()