# ============================================
# Digit Recognizer - High Accuracy CNN (5-fold ensemble, with checkpoints)
# Kaggle Notebook: 1-cell 完全版
# ============================================
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import StratifiedKFold

# ------------------------
# 1. 再現性のための seed 固定
# ------------------------
SEED = 42

def seed_everything(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

seed_everything()

# ------------------------
# 2. データ読み込み（Kaggle パス）
# ------------------------
train = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test  = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")

pixel_cols = [c for c in train.columns if c.startswith("pixel")]

X = train[pixel_cols].values.astype("float32") / 255.0
y = train["label"].values.astype("int32")
X_test = test[pixel_cols].values.astype("float32") / 255.0

# CNN 用に reshape: (N, 28, 28, 1)
X = X.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

NUM_CLASSES = 10

print("X shape:", X.shape)
print("X_test shape:", X_test.shape)

# ------------------------
# 3. モデル定義
# ------------------------
def build_model(input_shape=(28, 28, 1), num_classes=10):
    inputs = keras.Input(shape=input_shape)

    x = inputs
    # Block 1
    x = layers.Conv2D(32, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(32, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)

    # Block 2
    x = layers.Conv2D(64, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(64, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.30)(x)

    # Block 3
    x = layers.Conv2D(128, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(128, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.40)(x)

    # Dense 部分
    x = layers.Flatten()(x)
    x = layers.Dense(256, use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Dropout(0.5)(x)

    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = keras.Model(inputs, outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

# ------------------------
# 4. Data Augmentation
# ------------------------
datagen = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
)

# ------------------------
# 5. 5-fold Stratified CV + Checkpoint 保存
# ------------------------
N_FOLDS = 5
EPOCHS = 40
BATCH_SIZE = 128

skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

test_preds = np.zeros((X_test.shape[0], NUM_CLASSES), dtype="float32")

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):
    print(f"\n===== FOLD {fold}/{N_FOLDS} =====")

    X_tr, X_val = X[train_idx], X[val_idx]
    y_tr, y_val = y[train_idx], y[val_idx]

    model = build_model()

    # チェックポイント用パス（save_weights_only=True の場合は .weights.h5 が必須）
    ckpt_path = f"best_model_fold{fold}.weights.h5"

    ckpt = keras.callbacks.ModelCheckpoint(
        ckpt_path,
        monitor="val_accuracy",
        save_best_only=True,
        save_weights_only=True,
        verbose=1,
    )

    es = keras.callbacks.EarlyStopping(
        monitor="val_accuracy",
        patience=8,
        restore_best_weights=False,  # ★ ベストは ckpt から読むので False でOK
        verbose=1,
    )

    rlrop = keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.5,
        patience=4,
        min_lr=1e-5,
        verbose=1,
    )

    train_gen = datagen.flow(X_tr, y_tr, batch_size=BATCH_SIZE)
    steps_per_epoch = len(X_tr) // BATCH_SIZE

    history = model.fit(
        train_gen,
        epochs=EPOCHS,
        steps_per_epoch=steps_per_epoch,
        validation_data=(X_val, y_val),
        callbacks=[ckpt, es, rlrop],
        verbose=2,
    )

    # ★ ベストエポック時の重みを明示的に読み込む
    print(f"Loading best weights from {ckpt_path}")
    model.load_weights(ckpt_path)

    # test 予測
    fold_test_pred = model.predict(X_test, batch_size=BATCH_SIZE, verbose=1)
    test_preds += fold_test_pred / N_FOLDS

# ------------------------
# 6. アンサンブル結果から最終予測 & submission 作成
# ------------------------
final_test_labels = np.argmax(test_preds, axis=1)

submission = pd.DataFrame({
    "ImageId": np.arange(1, len(final_test_labels) + 1),
    "Label": final_test_labels,
})

submission.to_csv("submission_cnn_5fold.csv", index=False)
print("Saved submission_cnn_5fold.csv")
