# -*- coding: utf-8 -*-
"""
Spaceship Titanic — CatBoost CPU/GPU ベンチマーク（seed=42, 5fold, soft-vote）
=========================================================================
- 前処理はこれまでの上位再現版と同じ（CryoSleep↔支出、Cabin分解、GroupSize/IsAlone 等）
- CatBoost を CPU と GPU で同一条件（学習率・深さ・ESなど）で実行し、時間を比較
- 公平性のため：
  * 同一 StratifiedKFold 分割（seed=42）
  * 同一の前処理・パラメータ（GPUは rsm 非対応のため未指定）
  * cat_features は「列名」で指定、カテゴリ列は厳密に str 化
- 出力：各モードの fold別 学習/推論時間、CV精度、トータル時間
- さらに提出CSVも保存（念のためスコア比較にも使える）

# 両方（CPU→GPUの順で連続実行）
python benchmark_catboost_cpu_gpu_seed42.py --mode both

# どちらか片方だけ
python benchmark_catboost_cpu_gpu_seed42.py --mode cpu
python benchmark_catboost_cpu_gpu_seed42.py --mode gpu

"""

import os, time, argparse, random, numpy as np, pandas as pd
import catboost
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

# ------------------ 引数 ------------------
parser = argparse.ArgumentParser()
parser.add_argument("--mode", choices=["cpu","gpu","both"], default="both",
                    help="実行モード（cpu/gpu/both）")
args = parser.parse_args()

# ------------------ 再現性 ------------------
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)

print(f"[INFO] CatBoost version: {catboost.__version__}")

# ------------------ IO ------------------
TRAIN_CSV = "train.csv"
TEST_CSV  = "test.csv"
train = pd.read_csv(TRAIN_CSV)
test  = pd.read_csv(TEST_CSV)

# ------------------ 列定義 ------------------
SPEND_COLS = ["RoomService","FoodCourt","ShoppingMall","Spa","VRDeck"]
BOOL_COLS  = ["CryoSleep","VIP"]
CAT_COLS   = ["HomePlanet","Destination","Deck","Side"]   # CatBoostに渡すカテゴリ列（必ず str）
ID_COL     = "PassengerId"
TARGET_COL = "Transported"

# ------------------ 特徴量エンジニアリング ------------------
def build_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()

    # Bool→0/1
    for col in BOOL_COLS:
        if col in out.columns:
            out[col] = out[col].map({True:1, False:0})

    # 支出合計
    out["TotalSpend"] = out[SPEND_COLS].sum(axis=1, skipna=True)

    # CryoSleep と支出の連動補完
    mask_ct = (out["CryoSleep"] == 1)
    out.loc[mask_ct, SPEND_COLS] = 0
    cryo_na = out["CryoSleep"].isna()
    out.loc[cryo_na & (out["TotalSpend"].fillna(0) == 0), "CryoSleep"] = 1
    out.loc[cryo_na & (out["TotalSpend"].fillna(0) != 0), "CryoSleep"] = 0

    # Cabin 分解
    cab = out["Cabin"].fillna("Unknown").str.split("/", expand=True)
    out["Deck"] = cab[0].fillna("Unknown")
    out["Side"] = cab[2].fillna("Unknown")

    # PassengerId → Group / GroupOrder
    pid = out[ID_COL].str.split("_", expand=True)
    out["Group"] = pid[0]           # 高カーディナリティ→学習には使わない
    out["GroupOrder"] = pid[1].astype(int)

    # 数値の欠損
    for col in ["Age"] + SPEND_COLS + ["TotalSpend"]:
        out[col] = pd.to_numeric(out[col], errors="coerce").fillna(0)

    # VIP 欠損は 0
    out["VIP"] = out["VIP"].fillna(0)

    # カテゴリ列は必ず文字列化（0.0/NaN を含んでいても str へ）
    for col in CAT_COLS:
        out[col] = out[col].astype("string").fillna("Unknown").astype(str)

    # 再合計（CryoSleep補正後）
    out["TotalSpend"] = out[SPEND_COLS].sum(axis=1, skipna=True)

    # Group サイズ系
    out["GroupSize"] = 1
    out["IsAlone"]   = 0

    # 便利特徴
    out["HasSpend"]    = (out["TotalSpend"] > 0).astype(int)
    out["SpendPerAge"] = out["TotalSpend"] / (out["Age"] + 1.0)

    return out

# ------------------ train/test 結合 → GroupSize ------------------
train["_is_train"] = 1
test["_is_train"]  = 0
df_all = pd.concat([train, test], ignore_index=True)

df_all = build_features(df_all)
df_all["GroupSize"] = df_all.groupby("Group")[ID_COL].transform("count")
df_all["IsAlone"]   = (df_all["GroupSize"] == 1).astype(int)

# 不要列 drop
for c in ["Name","Cabin","Group", ID_COL]:
    if c in df_all.columns:
        df_all.drop(columns=[c], inplace=True)

# 分割
train_df = df_all[df_all["_is_train"] == 1].copy()
test_df  = df_all[df_all["_is_train"] == 0].copy()
y = train_df[TARGET_COL].map({True:1, False:0}).astype(int)

train_df.drop(columns=[TARGET_COL, "_is_train"], inplace=True)
test_df.drop(columns=["_is_train"], inplace=True)

# 念のためもう一度: カテゴリ列は必ず str
for c in CAT_COLS:
    if c in train_df.columns:
        train_df[c] = train_df[c].astype("string").astype(str)
    if c in test_df.columns:
        test_df[c] = test_df[c].astype("string").astype(str)

# 列順を完全一致
test_df = test_df.reindex(columns=train_df.columns, fill_value=np.nan)

# cat_features は「列名」で指定
cat_feat_names = [c for c in CAT_COLS if c in train_df.columns]

# ------------------ 5-Fold split（両モード共通で再利用） ------------------
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
fold_indices = list(skf.split(train_df, y))  # 同じ分割を CPU/GPU で使い回す

# ------------------ ベンチ関数 ------------------
def run_catboost(mode: str):
    """mode in {'CPU','GPU'}"""
    use_gpu = (mode.upper() == "GPU")
    print(f"\n========== [{mode}] 実行開始 ==========")

    oof = np.zeros(len(train_df))
    test_proba = np.zeros(len(test_df))

    fit_times = []
    pred_times = []
    scores = []
    best_iters = []

    t0 = time.perf_counter()

    for fold, (tr_idx, va_idx) in enumerate(fold_indices, 1):
        X_tr, X_va = train_df.iloc[tr_idx], train_df.iloc[va_idx]
        y_tr, y_va = y.iloc[tr_idx], y.iloc[va_idx]

        pool_tr = Pool(X_tr, y_tr, cat_features=cat_feat_names)
        pool_va = Pool(X_va, y_va, cat_features=cat_feat_names)
        pool_te = Pool(test_df,          cat_features=cat_feat_names)

        params = dict(
            loss_function="Logloss",
            eval_metric="Logloss",
            learning_rate=0.06,
            depth=6,
            l2_leaf_reg=3.0,
            iterations=4000,      # 早期停止前提で大きめ
            random_seed=SEED,
            boosting_type="Plain",
            bootstrap_type="Bernoulli",
            subsample=0.9,
            verbose=False,
            allow_writing_files=False
        )
        if use_gpu:
            params.update(task_type="GPU", devices="0")  # 複数GPUなら "0,1" 等

        model = CatBoostClassifier(**params)

        t_fit0 = time.perf_counter()
        model.fit(pool_tr, eval_set=pool_va, use_best_model=True, early_stopping_rounds=200)
        t_fit1 = time.perf_counter()

        t_pred0 = time.perf_counter()
        va_p = model.predict_proba(pool_va)[:, 1]
        te_p = model.predict_proba(pool_te)[:, 1]
        t_pred1 = time.perf_counter()

        fit_times.append(t_fit1 - t_fit0)
        pred_times.append(t_pred1 - t_pred0)

        oof[va_idx] = va_p
        acc = accuracy_score(y_va, (va_p >= 0.5).astype(int))
        scores.append(acc)
        best_iters.append(model.get_best_iteration())
        test_proba += te_p / len(fold_indices)

        print(f"[{mode}][Fold {fold}] Acc={acc:.5f}  best_iter={best_iters[-1]}  "
              f"fit={fit_times[-1]:.2f}s  pred={pred_times[-1]:.2f}s")

    total = time.perf_counter() - t0
    print(f"\n--- [{mode}] 結果 ---")
    print(f"CV Acc mean±std: {np.mean(scores):.5f} ± {np.std(scores):.5f}")
    print(f"Fit Time  mean±std: {np.mean(fit_times):.2f}s ± {np.std(fit_times):.2f}s")
    print(f"Pred Time mean±std: {np.mean(pred_times):.2f}s ± {np.std(pred_times):.2f}s")
    print(f"Total wall-clock : {total:.2f}s")

    # 提出作成
    test_pred = (test_proba >= 0.5).astype(int)
    out_csv = f"submission_catboost_{mode.lower()}_cv_seed42.csv"
    pd.DataFrame({
        "PassengerId": test["PassengerId"],
        "Transported": pd.Series(test_pred).map({1: True, 0: False})
    }).to_csv(out_csv, index=False)
    print(f"Saved: {out_csv}")

    return {
        "cv_mean": float(np.mean(scores)),
        "cv_std":  float(np.std(scores)),
        "fit_mean": float(np.mean(fit_times)),
        "fit_std":  float(np.std(fit_times)),
        "pred_mean": float(np.mean(pred_times)),
        "pred_std":  float(np.std(pred_times)),
        "total":    float(total)
    }

# ------------------ 実行 ------------------
summary = {}
if args.mode in ["cpu", "both"]:
    summary["CPU"] = run_catboost("CPU")
if args.mode in ["gpu", "both"]:
    summary["GPU"] = run_catboost("GPU")

# ------------------ サマリ比較 ------------------
if len(summary) > 1:
    print("\n========== まとめ（CPU vs GPU） ==========")
    def fmt(d, k): return f"{d[k]:.5f}" if 'cv' in k else f"{d[k]:.2f}s"
    print(f"CV Acc mean   : CPU {summary['CPU']['cv_mean']:.5f} | GPU {summary['GPU']['cv_mean']:.5f}")
    print(f"Fit mean time : CPU {summary['CPU']['fit_mean']:.2f}s | GPU {summary['GPU']['fit_mean']:.2f}s")
    print(f"Pred mean time: CPU {summary['CPU']['pred_mean']:.2f}s | GPU {summary['GPU']['pred_mean']:.2f}s")
    print(f"Total time    : CPU {summary['CPU']['total']:.2f}s | GPU {summary['GPU']['total']:.2f}s")
