| import tensorflow as tf |
| from tensorflow.keras.layers import Input, Embedding, LayerNormalization, MultiHeadAttention, Dense, Add, Dropout, Layer |
| from tensorflow.keras.models import Model |
| from tensorflow.keras.optimizers import Adam |
| from tensorflow.keras.losses import SparseCategoricalCrossentropy |
| import numpy as np |
|
|
| class VoidChatModel(tf.keras.Model): |
| def __init__(self, vocab_size, seq_len, num_layers=6, num_heads=8, emb_dim=512, mlp_dim=2048, dropout_rate=0.1): |
| super(VoidChatModel, self).__init__() |
| self.vocab_size = vocab_size |
| self.seq_len = seq_len |
| self.num_layers = num_layers |
| self.num_heads = num_heads |
| self.emb_dim = emb_dim |
| self.mlp_dim = mlp_dim |
| self.dropout_rate = dropout_rate |
| |
| |
| self.embedding = Embedding(input_dim=vocab_size, output_dim=emb_dim) |
| |
| |
| self.transformer_blocks = [TransformerBlock(num_heads, emb_dim, mlp_dim, dropout_rate) for _ in range(num_layers)] |
| |
| |
| self.output_layer = Dense(vocab_size, activation='softmax') |
| |
| def call(self, input_ids, training=False): |
| |
| x = self.embedding(input_ids) |