Skip to content

TypeError: Could not locate class 'NERModel'. Make sure custom classes are decorated with @keras.saving.register_keras_serializable() #430

@raphael10-collab

Description

@raphael10-collab

Hi!

When trying to convert the keras model to json model I get this error :

(.fdenv) (base) raphy@raohy:~/frugally-deep$ python keras_export/convert_model.py ./ner_transformers/ner_model.keras \
> ./ner_transformers/ner_model.json
2025-02-08 12:44:02.641134: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-08 12:44:02.643925: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-08 12:44:02.652688: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1739015042.667179   11429 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739015042.671586   11429 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-08 12:44:02.686459: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
loading ./ner_transformers/ner_model.keras
Traceback (most recent call last):
  File "/home/raphy/frugally-deep/keras_export/convert_model.py", line 715, in <module>
    main()
  File "/home/raphy/frugally-deep/keras_export/convert_model.py", line 711, in main
    convert(args.input_path, args.output_path, args.no_tests)
  File "/home/raphy/frugally-deep/keras_export/convert_model.py", line 692, in convert
    model = load_model(in_path, compile=False)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/raphy/frugally-deep/.fdenv/lib/python3.12/site-packages/keras/src/saving/saving_api.py", line 189, in load_model
    return saving_lib.load_model(
           ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/raphy/frugally-deep/.fdenv/lib/python3.12/site-packages/keras/src/saving/saving_lib.py", line 367, in load_model
    return _load_model_from_fileobj(
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/raphy/frugally-deep/.fdenv/lib/python3.12/site-packages/keras/src/saving/saving_lib.py", line 444, in _load_model_from_fileobj
    model = _model_from_config(
            ^^^^^^^^^^^^^^^^^^^
  File "/home/raphy/frugally-deep/.fdenv/lib/python3.12/site-packages/keras/src/saving/saving_lib.py", line 433, in _model_from_config
    model = deserialize_keras_object(
            ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/raphy/frugally-deep/.fdenv/lib/python3.12/site-packages/keras/src/saving/serialization_lib.py", line 694, in deserialize_keras_object
    cls = _retrieve_class_or_fn(
          ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/raphy/frugally-deep/.fdenv/lib/python3.12/site-packages/keras/src/saving/serialization_lib.py", line 803, in _retrieve_class_or_fn
    raise TypeError(
TypeError: Could not locate class 'NERModel'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': None, 'class_name': 'NERModel', 'config': {'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}}, 'registered_name': 'Custom>NERModel', 'build_config': {'input_shape': [32, 47]}, 'compile_config': None}
(.fdenv) (base) raphy@raohy:~/frugally-deep$ 

This is the text_classification_with_transformer.py taken from here : https://keras.io/examples/nlp/ner_transformers/

(.fdenv) (base) raphy@raohy:~/frugally-deep$ nano ./ner_transformers/text_classification_with_transformer.py


import os

os.environ["KERAS_BACKEND"] = "tensorflow"

import keras
from keras import ops
import numpy as np
import tensorflow as tf
from keras import layers
#from keras.models import Model
from datasets import load_dataset
from collections import Counter
from conlleval import evaluate

@keras.saving.register_keras_serializable()
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = keras.layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.ffn = keras.Sequential(
            [
                keras.layers.Dense(ff_dim, activation="relu"),
                keras.layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = keras.layers.Dropout(rate)
        self.dropout2 = keras.layers.Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


@keras.saving.register_keras_serializable()
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.token_emb = keras.layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.pos_emb = keras.layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, inputs):
        maxlen = ops.shape(inputs)[-1]
        positions = ops.arange(start=0, stop=maxlen, step=1)
        position_embeddings = self.pos_emb(positions)
        token_embeddings = self.token_emb(inputs)
        return token_embeddings + position_embeddings


@keras.saving.register_keras_serializable()
class NERModel(keras.Model):
    def __init__(
        self, num_tags, vocab_size, maxlen=128, embed_dim=32, num_heads=2, ff_dim=32
    ):
        super().__init__()
        self.embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
        self.transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.dropout1 = layers.Dropout(0.1)
        self.ff = layers.Dense(ff_dim, activation="relu")
        self.dropout2 = layers.Dropout(0.1)
        self.ff_final = layers.Dense(num_tags, activation="softmax")

    def call(self, inputs, training=False):
        x = self.embedding_layer(inputs)
        x = self.transformer_block(x)
        x = self.dropout1(x, training=training)
        x = self.ff(x)
        x = self.dropout2(x, training=training)
        x = self.ff_final(x)
        return x


conll_data = load_dataset("conll2003")


def export_to_file(export_file_path, data):
    with open(export_file_path, "w") as f:
        for record in data:
            ner_tags = record["ner_tags"]
            tokens = record["tokens"]
            if len(tokens) > 0:
                f.write(
                    str(len(tokens))
                    + "\t"
                    + "\t".join(tokens)
                    + "\t"
                    + "\t".join(map(str, ner_tags))
                    + "\n"
                )


os.mkdir("data")
export_to_file("./data/conll_train.txt", conll_data["train"])
export_to_file("./data/conll_val.txt", conll_data["validation"])


def make_tag_lookup_table():
    iob_labels = ["B", "I"]
    ner_labels = ["PER", "ORG", "LOC", "MISC"]
    all_labels = [(label1, label2) for label2 in ner_labels for label1 in iob_labels]
    all_labels = ["-".join([a, b]) for a, b in all_labels]
    all_labels = ["[PAD]", "O"] + all_labels
    return dict(zip(range(0, len(all_labels) + 1), all_labels))


mapping = make_tag_lookup_table()
print(mapping)


all_tokens = sum(conll_data["train"]["tokens"], [])
all_tokens_array = np.array(list(map(str.lower, all_tokens)))

counter = Counter(all_tokens_array)
print(len(counter))

num_tags = len(mapping)
vocab_size = 20000

# We only take (vocab_size - 2) most commons words from the training data since
# the `StringLookup` class uses 2 additional tokens - one denoting an unknown
# token and another one denoting a masking token
vocabulary = [token for token, count in counter.most_common(vocab_size - 2)]

# The StringLook class will convert tokens to token IDs
lookup_layer = keras.layers.StringLookup(vocabulary=vocabulary)


train_data = tf.data.TextLineDataset("./data/conll_train.txt")
val_data = tf.data.TextLineDataset("./data/conll_val.txt")


print(list(train_data.take(1).as_numpy_iterator()))


def map_record_to_training_data(record):
    record = tf.strings.split(record, sep="\t")
    length = tf.strings.to_number(record[0], out_type=tf.int32)
    tokens = record[1 : length + 1]
    tags = record[length + 1 :]
    tags = tf.strings.to_number(tags, out_type=tf.int64)
    tags += 1
    return tokens, tags


def lowercase_and_convert_to_ids(tokens):
    tokens = tf.strings.lower(tokens)
    return lookup_layer(tokens)

# We use `padded_batch` here because each record in the dataset has a
# different length.
batch_size = 32
train_dataset = (
    train_data.map(map_record_to_training_data)
    .map(lambda x, y: (lowercase_and_convert_to_ids(x), y))
    .padded_batch(batch_size)
)
val_dataset = (
    val_data.map(map_record_to_training_data)
    .map(lambda x, y: (lowercase_and_convert_to_ids(x), y))
    .padded_batch(batch_size)
)


ner_model = NERModel(num_tags, vocab_size, embed_dim=32, num_heads=4, ff_dim=64)


@keras.saving.register_keras_serializable()
class CustomNonPaddingTokenLoss(keras.losses.Loss):
    def __init__(self, name="custom_ner_loss"):
        super().__init__(name=name)

    def call(self, y_true, y_pred):
        loss_fn = keras.losses.SparseCategoricalCrossentropy(
            from_logits=False, reduction=None
        )
        loss = loss_fn(y_true, y_pred)
        mask = ops.cast((y_true > 0), dtype="float32")
        loss = loss * mask
        return ops.sum(loss) / ops.sum(mask)


loss = CustomNonPaddingTokenLoss()

tf.config.run_functions_eagerly(True)
ner_model.compile(optimizer="adam", loss=loss)
ner_model.fit(train_dataset, epochs=10)


ner_model.save('ner_model.keras')


def tokenize_and_convert_to_ids(text):
    tokens = text.split()
    return lowercase_and_convert_to_ids(tokens)


# Sample inference using the trained model
sample_input = tokenize_and_convert_to_ids(
    "eu rejects german call to boycott british lamb"
)
sample_input = ops.reshape(sample_input, [1, -1])
print(sample_input)

output = ner_model.predict(sample_input)
prediction = np.argmax(output, axis=-1)[0]
prediction = [mapping[i] for i in prediction]

# eu -> B-ORG, german -> B-MISC, british -> B-MISC
print(prediction)


def calculate_metrics(dataset):
    all_true_tag_ids, all_predicted_tag_ids = [], []

    for x, y in dataset:
        output = ner_model.predict(x, verbose=0)
        predictions = ops.argmax(output, axis=-1)
        predictions = ops.reshape(predictions, [-1])

        true_tag_ids = ops.reshape(y, [-1])

        mask = (true_tag_ids > 0) & (predictions > 0)
        true_tag_ids = true_tag_ids[mask]
        predicted_tag_ids = predictions[mask]

        all_true_tag_ids.append(true_tag_ids)
        all_predicted_tag_ids.append(predicted_tag_ids)

    all_true_tag_ids = np.concatenate(all_true_tag_ids)
    all_predicted_tag_ids = np.concatenate(all_predicted_tag_ids)

    predicted_tags = [mapping[tag] for tag in all_predicted_tag_ids]
    real_tags = [mapping[tag] for tag in all_true_tag_ids]

    evaluate(real_tags, predicted_tags)


calculate_metrics(val_dataset)

The training and the saving of the keras model didn't have, apparently, any issues :

(.fdenv) (base) raphy@raohy:~/frugally-deep/ner_transformers$ python text_classification_with_transformer.py 
2025-02-08 12:30:46.573523: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-08 12:30:46.576838: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-08 12:30:46.585416: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1739014246.599707    8699 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739014246.604083    8699 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-08 12:30:46.619626: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
{0: '[PAD]', 1: 'O', 2: 'B-PER', 3: 'I-PER', 4: 'B-ORG', 5: 'I-ORG', 6: 'B-LOC', 7: 'I-LOC', 8: 'B-MISC', 9: 'I-MISC'}
21009
2025-02-08 12:31:00.378098: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-02-08 12:31:00.438756: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
[b'9\tEU\trejects\tGerman\tcall\tto\tboycott\tBritish\tlamb\t.\t3\t0\t7\t0\t0\t0\t7\t0\t0']
Epoch 1/10
    439/Unknown 60s 135ms/step - loss: 0.87492025-02-08 12:32:00.323589: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
/home/raphy/frugally-deep/.fdenv/lib/python3.12/site-packages/keras/src/trainers/epoch_iterator.py:151: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.
  self._interrupted_warning()
439/439 ━━━━━━━━━━━━━━━━━━━━ 60s 135ms/step - loss: 0.8743
Epoch 2/10
439/439 ━━━━━━━━━━━━━━━━━━━━ 59s 134ms/step - loss: 0.2881 
Epoch 3/10
439/439 ━━━━━━━━━━━━━━━━━━━━ 0s 135ms/step - loss: 0.16002025-02-08 12:33:58.672932: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
439/439 ━━━━━━━━━━━━━━━━━━━━ 59s 135ms/step - loss: 0.1600
Epoch 4/10
439/439 ━━━━━━━━━━━━━━━━━━━━ 64s 145ms/step - loss: 0.1197 
Epoch 5/10
439/439 ━━━━━━━━━━━━━━━━━━━━ 66s 150ms/step - loss: 0.0971 
Epoch 6/10
439/439 ━━━━━━━━━━━━━━━━━━━━ 66s 151ms/step - loss: 0.0797 
Epoch 7/10
439/439 ━━━━━━━━━━━━━━━━━━━━ 0s 152ms/step - loss: 0.06492025-02-08 12:38:21.459261: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
439/439 ━━━━━━━━━━━━━━━━━━━━ 67s 152ms/step - loss: 0.0649
Epoch 8/10
439/439 ━━━━━━━━━━━━━━━━━━━━ 67s 153ms/step - loss: 0.0544 
Epoch 9/10
439/439 ━━━━━━━━━━━━━━━━━━━━ 67s 152ms/step - loss: 0.0472 
Epoch 10/10
439/439 ━━━━━━━━━━━━━━━━━━━━ 66s 151ms/step - loss: 0.0450 
tf.Tensor([[  988 10950   204   628     6  3938   215  5773]], shape=(1, 8), dtype=int64)
/home/raphy/frugally-deep/.fdenv/lib/python3.12/site-packages/tensorflow/python/data/ops/structured_function.py:258: UserWarning: Even though the `tf.config.experimental_run_functions_eagerly` option is set, this option does not apply to tf.data functions. To force eager execution of tf.data functions, please use `tf.data.experimental.enable_debug_mode()`.
  warnings.warn(
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 44ms/step
['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O']
processed 51362 tokens with 5942 phrases; found: 5638 phrases; correct: 3921.
accuracy:  64.45%; (non-O)
accuracy:  93.26%; precision:  69.55%; recall:  65.99%; FB1:  67.72
              LOC: precision:  81.44%; recall:  81.00%; FB1:  81.22  1827
             MISC: precision:  70.98%; recall:  68.98%; FB1:  69.97  896
              ORG: precision:  58.78%; recall:  62.64%; FB1:  60.65  1429
              PER: precision:  64.40%; recall:  51.95%; FB1:  57.51  1486

How to make it work?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions