ckiias/lec4-3-nlp-conv.ipynb

29 KiB

In [1]:
import os

os.environ["KERAS_BACKEND"] = "torch"
import keras

print(keras.__version__)
3.9.2
In [2]:
from keras.api.datasets import imdb
import os

unique_words = 5000
max_length = 400

output_dir = "tmp"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

(X_train, y_train), (X_valid, y_valid) = imdb.load_data(num_words=unique_words)
In [3]:
from keras.api.preprocessing.sequence import pad_sequences

X_train = pad_sequences(X_train, maxlen=max_length, padding="pre", truncating="pre", value=0)
X_valid = pad_sequences(X_valid, maxlen=max_length, padding="pre", truncating="pre", value=0)
In [4]:
from keras.api.models import Sequential
from keras.api.layers import InputLayer, Embedding, SpatialDropout1D, Conv1D, GlobalMaxPooling1D, Dense, Dropout

conv_model = Sequential()
conv_model.add(InputLayer(shape=(max_length,), dtype="float32"))
conv_model.add(Embedding(unique_words, 64))
conv_model.add(SpatialDropout1D(0.2))

# сверточный слой
conv_model.add(Conv1D(256, 3, activation="relu"))

conv_model.add(GlobalMaxPooling1D())

# полносвязанный слой
conv_model.add(Dense(256, activation="relu"))
conv_model.add(Dropout(0.2))

# выходной слой
conv_model.add(Dense(1, activation="sigmoid"))

conv_model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                     Output Shape                  Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ embedding (Embedding)           │ (None, 400, 64)        │       320,000 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ spatial_dropout1d               │ (None, 400, 64)        │             0 │
│ (SpatialDropout1D)              │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv1d (Conv1D)                 │ (None, 398, 256)       │        49,408 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_max_pooling1d            │ (None, 256)            │             0 │
│ (GlobalMaxPooling1D)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 256)            │        65,792 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 256)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 1)              │           257 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 435,457 (1.66 MB)
 Trainable params: 435,457 (1.66 MB)
 Non-trainable params: 0 (0.00 B)
In [5]:
from keras.api.callbacks import ModelCheckpoint

conv_model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"],
)

conv_model.fit(
    X_train,
    y_train,
    batch_size=128,
    epochs=4,
    validation_data=(X_valid, y_valid),
    callbacks=[ModelCheckpoint(filepath=output_dir + "/conv_weights.{epoch:02d}.keras")],
)
Epoch 1/4
196/196 ━━━━━━━━━━━━━━━━━━━━ 10s 48ms/step - accuracy: 0.6241 - loss: 0.6084 - val_accuracy: 0.8735 - val_loss: 0.3017
Epoch 2/4
196/196 ━━━━━━━━━━━━━━━━━━━━ 9s 44ms/step - accuracy: 0.8978 - loss: 0.2526 - val_accuracy: 0.8906 - val_loss: 0.2637
Epoch 3/4
196/196 ━━━━━━━━━━━━━━━━━━━━ 9s 45ms/step - accuracy: 0.9400 - loss: 0.1670 - val_accuracy: 0.8906 - val_loss: 0.2735
Epoch 4/4
196/196 ━━━━━━━━━━━━━━━━━━━━ 9s 46ms/step - accuracy: 0.9648 - loss: 0.1062 - val_accuracy: 0.8894 - val_loss: 0.3019
Out[5]:
<keras.src.callbacks.history.History at 0x10830c080>
In [6]:
conv_model.load_weights(output_dir + "/conv_weights.02.keras")
conv_model.evaluate(X_valid, y_valid)
782/782 ━━━━━━━━━━━━━━━━━━━━ 6s 8ms/step - accuracy: 0.8874 - loss: 0.2689
Out[6]:
[0.2637385427951813, 0.89055997133255]
In [7]:
import matplotlib.pyplot as plt

plt.hist(conv_model.predict(X_valid))
_ = plt.axvline(x=0.5, color="orange")
782/782 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step