和風ましらに

機械学習とか勉強したことを書き認めるブログ

時系列分析 - データ準備編(BTC価格予測)

何をしているか

  • 時系列予測の題材として、BTCの価格予測をする
  • それに際して、ひとまずどこかからデータを引っ張ってくる必要がある
  • ひとまず、binanceのデータを活用して、データの調達をした

作業詳細

データ取得

仮想通貨取引所の大手であるbinanceを公開してくれている。 www.binance.com

ここから、データを引っ張ってくる。

# --
import time
import datetime
from datetime import datetime as dt
# --
import hmac
import hashlib
import requests
import json

from binance.client import Client

client = Client()
klines_BTC = client.get_historical_klines("BTCUSDT", Client.KLINE_INTERVAL_15MINUTE, '15 June, 2022', '10 Jan 2023')

taisho_cols=['Time','Open','High','Low','Close','Volume','Close_Time','Quote_Volume','Number_of_Trades','Taker_buy_base_asset_volume','Taker_buy_quote_asset_volume','Ignore']
df_BTC = pd.DataFrame(df_klines, columns = [taisho_cols])

f_maxmin = lambda x: dt.fromtimestamp(float(x)/1000).strftime('%Y%m%d%H%M%S')
df_BTC["timestamp"] = df_BTC["Close_Time"].apply(f_maxmin)

ひとまず、いろいろ抽出できてるっぽい

('Time',) ('Open',) ('High',) ('Low',) ('Close',) ('Volume',) ('Close_Time',) ('Quote_Volume',) ('Number_of_Trades',) ('Taker_buy_base_asset_volume',) ('Taker_buy_quote_asset_volume',) ('Ignore',) ('timestamp',)
0 1655251200000 22136.4 22153.2 21882 21934 1424.74 1655252099999 3.13436e+07 21181 744.015 1.63642e+07 0 20220615091459
1 1655252100000 21935.4 22100 21904.3 21981.7 755.669 1655252999999 1.66372e+07 13551 377.503 8.31082e+06 0 20220615092959
2 1655253000000 21981.7 22147 21942.3 22047.9 523.564 1655253899999 1.15532e+07 11850 261.831 5.77717e+06 0 20220615094459
3 1655253900000 22047.9 22091.5 21741.6 21808.3 1217.73 1655254799999 2.66141e+07 17946 574.265 1.25462e+07 0 20220615095959
4 1655254800000 21808.3 21968.9 21770.7 21951 770.302 1655255699999 1.68429e+07 13102 422.603 9.2407e+06 0 20220615101459

データ加工

ネットの記事を参考に、過去実績系の特徴量・指標系特徴量をつくる。

def make_lag_feature(x, col_name, sbn_flg=True):
    # ---
    add_lag_list = ["lag2","lag4","lag12","lag48","lag96","lag144","lag288"]
    add_std_list = ["std2","std4","std12","std48","std96","std144","std288"]
    # ---
    col_nm1 = [f"{i}_{col_name}" for i in add_lag_list]
    col_nm2 = [f"{i}_{col_name}" for i in add_std_list]
    # ---
    for i in col_nm1:
        # --
        suchi = int(re.sub(r"\D", "", i))
        x[i]  = x[col_name].shift(suchi)
        # --
        if sbn_flg:
            col_nm2 = f"abs_{i}"
            x[col_nm2]  = (x[col_name]-x[i]).abs()
        else:
            col_nm2 = f"abs_{i}"
            x[col_nm2]  = (x[i]).abs()
    # --
    for i in col_nm1:
        # --
        suchi = int(re.sub(r"\D", "", i))
        x[i]  = x[col_name].rolling(suchi).std()
    # --
    return x

def make_feature(x):
    # --
    base_col = ["Open", "High", "Low", "Close", "Volume"]
    cate_col = ["op_cl", "op_hi", "op_lo", "hi_cl", "hi_lo", "lo_cl"]
    # ---
    x["op_cl"] = x["Open"] - x["Close"]
    x["op_hi"] = x["Open"] - x["High"]
    x["op_lo"] = x["Open"] - x["Low"]
    x["hi_cl"] = x["High"] - x["Close"]
    x["hi_lo"] = x["High"] - x["Low"]
    x["lo_cl"] = x["Low"] - x["Close"]
    # --
    for col_name in base_col:
        x = make_lag_feature(x, col_name, sbn_flg=True)
    for col_name in cate_col:
        x = make_lag_feature(x, col_name, sbn_flg=False)
    return x


def make_ta_feature(x):
    # Add Simple Moving Average (SMA) indicators
    x["sma7"] = SMAIndicator(close=x["Close"], window=7, fillna=True).sma_indicator()
    x["sma25"] = SMAIndicator(close=x["Close"], window=25, fillna=True).sma_indicator()
    x["sma99"] = SMAIndicator(close=x["Close"], window=99, fillna=True).sma_indicator()
    # Add Bollinger Bands indicator
    indicator_bb = BollingerBands(close=x["Close"], window=20, window_dev=2)
    x['bb_bbm'] = indicator_bb.bollinger_mavg()
    x['bb_bbh'] = indicator_bb.bollinger_hband()
    x['bb_bbl'] = indicator_bb.bollinger_lband()
    # Add Parabolic Stop and Reverse (Parabolic SAR) indicator
    indicator_psar = PSARIndicator(high=x["High"], low=x["Low"], close=x["Close"], step=0.02, max_step=2, fillna=True)
    x['psar'] = indicator_psar.psar()
    # Add Moving Average Convergence Divergence (MACD) indicator
    x["MACD"] = macd(close=x["Close"], window_slow=26, window_fast=12, fillna=True) # mazas
    # Add Relative Strength Index (RSI) indicator
    x["RSI"] = rsi(close=x["Close"], window=14, fillna=True) # mazas
    return x

def create_features(x):
    # --
    x = make_feature(x)
    x = make_ta_feature(x)
    return x

次回はいろいろアルゴリズムに入れて、挙動みていく予定