1. conll2003数据集介绍以及数据集预处理请看下面博客
conll2003数据集下载与预处理_茫茫人海一粒沙的博客-CSDN博客
2. 取预处理过的数据集
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed
import keras as keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
import numpy as np
from constant.config import *def load_dataset():dataset = np.load('dataset/dataset.npz')train_X = dataset['train_X']train_y = dataset['train_y']valid_X = dataset['valid_X']valid_y = dataset['valid_y']test_X = dataset['test_X']test_y = dataset['test_y']return train_X, train_y, valid_X, valid_y, test_X, test_y
3. 创建基于lstm网络的NER模型
max_len =64def create_model():word2idx = load_dict('dataset/word2idx.json')tag2idx = load_dict('dataset/idx2Label.json')num_words = len(word2idx) + 1num_tags = len(tag2idx)# Define the modelinput_layer = Input(shape=(None,))embedding_layer = Embedding(input_dim=num_words, output_dim=60, input_length=max_len)(input_layer)lstm_layer = LSTM(units=50, return_sequences=True, dropout=0.5)(embedding_layer)output_layer = TimeDistributed(Dense(num_tags, activation="softmax"))(lstm_layer)model = Model(input_layer, output_layer)return model
4. 训练模型
def train( model, train_X, train_y, valid_X, valid_y):# 定义保存模型的路径和文件名model_path = './dataset/ner_model.h5'# 定义早停回调函数early_stop = EarlyStopping(monitor='val_accuracy', patience=3, mode='max', verbose=1)# 定义ModelCheckpoint回调函数checkpoint = ModelCheckpoint(model_path, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)# Compile and train the modelmodel.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])print(np.array(train_X).shape)print(np.array(train_y).shape)print(np.array(valid_X).shape)print(np.array(valid_y).shape)model.fit(train_X, train_y, batch_size=32, epochs=20, validation_data=(valid_X, valid_y), callbacks=[early_stop, checkpoint])
5. 测试模型
def test(test_X, test_y ):model = keras.models.load_model('./dataset/ner_model.h5')# 评估模型scores = model.evaluate(test_X, test_y, verbose=0)print("Test accuracy:", scores[1])
6. 保存文件以及加载文件的方法
def save_dict(dict, file_path):import json# Saving the dictionary to a filewith open(file_path, 'w') as f:json.dump(dict, f)def load_dict(path_file):import json# Loading the dictionary from the filewith open(path_file, 'r') as f:loaded_dict = json.load(f)return loaded_dict;print(loaded_dict) # Output: {'key1': 'value1', 'key2': 'value2'}
7. main方法来训练模型与测试模型
if __name__ == '__main__':train_X, train_y, valid_X, valid_y, test_X, test_y =load_dataset()model= create_model()train(model, np.concatenate([train_X, valid_X]), np.concatenate([train_y, valid_y]),test_X, test_y)test(test_X, test_y )# predict()
执行结果. 一共应用了12个Epoch,训练集上的准确率是99.7%,测试集上准确率是98%。
577/577 [==============================] - ETA: 0s - loss: 0.0115 - accuracy: 0.9964
Epoch 10: val_accuracy did not improve from 0.98385
577/577 [==============================] - 16s 28ms/step - loss: 0.0115 - accuracy: 0.9964 - val_loss: 0.0570 - val_accuracy: 0.9835
Epoch 11/20
575/577 [============================>.] - ETA: 0s - loss: 0.0103 - accuracy: 0.9968
Epoch 11: val_accuracy did not improve from 0.98385
577/577 [==============================] - 16s 28ms/step - loss: 0.0103 - accuracy: 0.9968 - val_loss: 0.0598 - val_accuracy: 0.9836
Epoch 12/20
577/577 [==============================] - ETA: 0s - loss: 0.0095 - accuracy: 0.9970
Epoch 12: val_accuracy did not improve from 0.98385
577/577 [==============================] - 16s 28ms/step - loss: 0.0095 - accuracy: 0.9970 - val_loss: 0.0584 - val_accuracy: 0.9837
Epoch 12: early stopping
Test accuracy: 0.9838491082191467
8. 预测模型
def predict():# Example sentences to predicttest_sentences = ["John Wilson works at Apple .","I have a meeting with Peter Blackburn tomorrow.","BRUSSELS","Peter Blackburn",'EU rejects German call to boycott British lamb.','The European Commission said on Thursday it disagreed with German advice to consumers to shun British']word2idx = load_dict('dataset/word2idx.json')tag2idx = load_dict('dataset/idx2Label.json')model = keras.models.load_model('./dataset/ner_model.h5')# Convert test sentences to numerical sequencestest_sequences = [[word2idx.get(word.lower(), 0) for word in sentence.split()] for sentence in test_sentences]print('test_sequences:',test_sequences)test_sequences = tf.keras.preprocessing.sequence.pad_sequences(maxlen=max_len, sequences=test_sequences, padding='post' , value=0)# Make predictionspredictions = model.predict(test_sequences)# print(predictions)predicted_tags = tf.argmax(predictions, axis=-1)# Convert predicted tags back to labelspredicted_labels = []for tags in predicted_tags:labels = [list(tag2idx.keys())[tag] for tag in tags if tag != 0]predicted_labels.append(labels)# Print the predicted labelsfor sentence, labels in zip(test_sentences, predicted_labels):print(f"Sentence: {sentence}")print(f"Predicted Labels: {labels}\n")
执行结果
Sentence: John Wilson works at Apple
Predicted Labels: ['I-PER', 'O', 'O', 'B-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']Sentence: I have a meeting with Peter Blackburn tomorrow.
Predicted Labels: ['O', 'O', 'O', 'O', 'O', 'I-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']Sentence: BRUSSELS
Predicted Labels: ['B-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']Sentence: Peter Blackburn
Predicted Labels: ['I-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']Sentence: EU rejects German call to boycott British lamb.
Predicted Labels: ['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']Sentence: The European Commission said on Thursday it disagreed with German advice to consumers to shun British
Predicted Labels: ['O', 'B-MISC', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'B-MISC', 'O', 'O', 'O', 'O', 'O', 'B-MISC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
9. 所有代码
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed
import keras as keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
import numpy as np
from constant.config import *def load_dataset():dataset = np.load('dataset/dataset.npz')train_X = dataset['train_X']train_y = dataset['train_y']valid_X = dataset['valid_X']valid_y = dataset['valid_y']test_X = dataset['test_X']test_y = dataset['test_y']return train_X, train_y, valid_X, valid_y, test_X, test_ymax_len =64def create_model():word2idx = load_dict('dataset/word2idx.json')tag2idx = load_dict('dataset/idx2Label.json')num_words = len(word2idx) + 1num_tags = len(tag2idx)# Define the modelinput_layer = Input(shape=(None,))embedding_layer = Embedding(input_dim=num_words, output_dim=60, input_length=max_len)(input_layer)lstm_layer = LSTM(units=50, return_sequences=True, dropout=0.5)(embedding_layer)output_layer = TimeDistributed(Dense(num_tags, activation="softmax"))(lstm_layer)model = Model(input_layer, output_layer)return modeldef train( model, train_X, train_y, valid_X, valid_y):# 定义保存模型的路径和文件名model_path = './dataset/ner_model.h5'# 定义早停回调函数early_stop = EarlyStopping(monitor='val_accuracy', patience=3, mode='max', verbose=1)# 定义ModelCheckpoint回调函数checkpoint = ModelCheckpoint(model_path, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)# Compile and train the modelmodel.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])print(np.array(train_X).shape)print(np.array(train_y).shape)print(np.array(valid_X).shape)print(np.array(valid_y).shape)model.fit(train_X, train_y, batch_size=32, epochs=20, validation_data=(valid_X, valid_y), callbacks=[early_stop, checkpoint])
def save_dict(dict, file_path):import json# Saving the dictionary to a filewith open(file_path, 'w') as f:json.dump(dict, f)def load_dict(path_file):import json# Loading the dictionary from the filewith open(path_file, 'r') as f:loaded_dict = json.load(f)return loaded_dict;print(loaded_dict) # Output: {'key1': 'value1', 'key2': 'value2'}def test(test_X, test_y ):model = keras.models.load_model('./dataset/ner_model.h5')# 评估模型scores = model.evaluate(test_X, test_y, verbose=0)print("Test accuracy:", scores[1])def predict():# Example sentences to predicttest_sentences = ["John Wilson works at Apple .","I have a meeting with Peter Blackburn tomorrow.","BRUSSELS","Peter Blackburn",'EU rejects German call to boycott British lamb.','The European Commission said on Thursday it disagreed with German advice to consumers to shun British']word2idx = load_dict('dataset/word2idx.json')tag2idx = load_dict('dataset/idx2Label.json')model = keras.models.load_model('./dataset/ner_model.h5')# Convert test sentences to numerical sequencestest_sequences = [[word2idx.get(word.lower(), 0) for word in sentence.split()] for sentence in test_sentences]print('test_sequences:',test_sequences)test_sequences = tf.keras.preprocessing.sequence.pad_sequences(maxlen=max_len, sequences=test_sequences, padding='post' , value=0)# Make predictionspredictions = model.predict(test_sequences)# print(predictions)predicted_tags = tf.argmax(predictions, axis=-1)# Convert predicted tags back to labelspredicted_labels = []for tags in predicted_tags:labels = [list(tag2idx.keys())[tag] for tag in tags if tag != 0]predicted_labels.append(labels)# Print the predicted labelsfor sentence, labels in zip(test_sentences, predicted_labels):print(f"Sentence: {sentence}")print(f"Predicted Labels: {labels}\n")if __name__ == '__main__':train_X, train_y, valid_X, valid_y, test_X, test_y =load_dataset()model= create_model()train(model, np.concatenate([train_X, valid_X]), np.concatenate([train_y, valid_y]),test_X, test_y)test(test_X, test_y )# predict()