時系列分析 - データ準備編(BTC価格予測)
何をしているか
- 時系列予測の題材として、BTCの価格予測をする
- それに際して、ひとまずどこかからデータを引っ張ってくる必要がある
- ひとまず、binanceのデータを活用して、データの調達をした
作業詳細
データ取得
仮想通貨取引所の大手であるbinanceを公開してくれている。 www.binance.com
ここから、データを引っ張ってくる。
# -- import time import datetime from datetime import datetime as dt # -- import hmac import hashlib import requests import json from binance.client import Client client = Client() klines_BTC = client.get_historical_klines("BTCUSDT", Client.KLINE_INTERVAL_15MINUTE, '15 June, 2022', '10 Jan 2023') taisho_cols=['Time','Open','High','Low','Close','Volume','Close_Time','Quote_Volume','Number_of_Trades','Taker_buy_base_asset_volume','Taker_buy_quote_asset_volume','Ignore'] df_BTC = pd.DataFrame(df_klines, columns = [taisho_cols]) f_maxmin = lambda x: dt.fromtimestamp(float(x)/1000).strftime('%Y%m%d%H%M%S') df_BTC["timestamp"] = df_BTC["Close_Time"].apply(f_maxmin)
ひとまず、いろいろ抽出できてるっぽい
('Time',) | ('Open',) | ('High',) | ('Low',) | ('Close',) | ('Volume',) | ('Close_Time',) | ('Quote_Volume',) | ('Number_of_Trades',) | ('Taker_buy_base_asset_volume',) | ('Taker_buy_quote_asset_volume',) | ('Ignore',) | ('timestamp',) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1655251200000 | 22136.4 | 22153.2 | 21882 | 21934 | 1424.74 | 1655252099999 | 3.13436e+07 | 21181 | 744.015 | 1.63642e+07 | 0 | 20220615091459 |
1 | 1655252100000 | 21935.4 | 22100 | 21904.3 | 21981.7 | 755.669 | 1655252999999 | 1.66372e+07 | 13551 | 377.503 | 8.31082e+06 | 0 | 20220615092959 |
2 | 1655253000000 | 21981.7 | 22147 | 21942.3 | 22047.9 | 523.564 | 1655253899999 | 1.15532e+07 | 11850 | 261.831 | 5.77717e+06 | 0 | 20220615094459 |
3 | 1655253900000 | 22047.9 | 22091.5 | 21741.6 | 21808.3 | 1217.73 | 1655254799999 | 2.66141e+07 | 17946 | 574.265 | 1.25462e+07 | 0 | 20220615095959 |
4 | 1655254800000 | 21808.3 | 21968.9 | 21770.7 | 21951 | 770.302 | 1655255699999 | 1.68429e+07 | 13102 | 422.603 | 9.2407e+06 | 0 | 20220615101459 |
データ加工
ネットの記事を参考に、過去実績系の特徴量・指標系特徴量をつくる。
def make_lag_feature(x, col_name, sbn_flg=True): # --- add_lag_list = ["lag2","lag4","lag12","lag48","lag96","lag144","lag288"] add_std_list = ["std2","std4","std12","std48","std96","std144","std288"] # --- col_nm1 = [f"{i}_{col_name}" for i in add_lag_list] col_nm2 = [f"{i}_{col_name}" for i in add_std_list] # --- for i in col_nm1: # -- suchi = int(re.sub(r"\D", "", i)) x[i] = x[col_name].shift(suchi) # -- if sbn_flg: col_nm2 = f"abs_{i}" x[col_nm2] = (x[col_name]-x[i]).abs() else: col_nm2 = f"abs_{i}" x[col_nm2] = (x[i]).abs() # -- for i in col_nm1: # -- suchi = int(re.sub(r"\D", "", i)) x[i] = x[col_name].rolling(suchi).std() # -- return x def make_feature(x): # -- base_col = ["Open", "High", "Low", "Close", "Volume"] cate_col = ["op_cl", "op_hi", "op_lo", "hi_cl", "hi_lo", "lo_cl"] # --- x["op_cl"] = x["Open"] - x["Close"] x["op_hi"] = x["Open"] - x["High"] x["op_lo"] = x["Open"] - x["Low"] x["hi_cl"] = x["High"] - x["Close"] x["hi_lo"] = x["High"] - x["Low"] x["lo_cl"] = x["Low"] - x["Close"] # -- for col_name in base_col: x = make_lag_feature(x, col_name, sbn_flg=True) for col_name in cate_col: x = make_lag_feature(x, col_name, sbn_flg=False) return x def make_ta_feature(x): # Add Simple Moving Average (SMA) indicators x["sma7"] = SMAIndicator(close=x["Close"], window=7, fillna=True).sma_indicator() x["sma25"] = SMAIndicator(close=x["Close"], window=25, fillna=True).sma_indicator() x["sma99"] = SMAIndicator(close=x["Close"], window=99, fillna=True).sma_indicator() # Add Bollinger Bands indicator indicator_bb = BollingerBands(close=x["Close"], window=20, window_dev=2) x['bb_bbm'] = indicator_bb.bollinger_mavg() x['bb_bbh'] = indicator_bb.bollinger_hband() x['bb_bbl'] = indicator_bb.bollinger_lband() # Add Parabolic Stop and Reverse (Parabolic SAR) indicator indicator_psar = PSARIndicator(high=x["High"], low=x["Low"], close=x["Close"], step=0.02, max_step=2, fillna=True) x['psar'] = indicator_psar.psar() # Add Moving Average Convergence Divergence (MACD) indicator x["MACD"] = macd(close=x["Close"], window_slow=26, window_fast=12, fillna=True) # mazas # Add Relative Strength Index (RSI) indicator x["RSI"] = rsi(close=x["Close"], window=14, fillna=True) # mazas return x def create_features(x): # -- x = make_feature(x) x = make_ta_feature(x) return x
次回はいろいろアルゴリズムに入れて、挙動みていく予定