From 61e4af020dfc5f4ddc95269174d04e1d424cade1 Mon Sep 17 00:00:00 2001 From: newbie Date: Tue, 24 Jun 2025 23:10:53 +0800 Subject: [PATCH] Refactor data saving and visualization functions; add metrics tracking in Qnn model --- Qfunctions/loaData.py | 13 +-- Qfunctions/saveToxlsx.py | 172 ++++++++++++++++++++++++++++++++++++++- Qtorch/Models/Qnn.py | 37 +++++++-- main.py | 28 ++++--- 4 files changed, 220 insertions(+), 30 deletions(-) diff --git a/Qfunctions/loaData.py b/Qfunctions/loaData.py index 0024c1c..75abac6 100644 --- a/Qfunctions/loaData.py +++ b/Qfunctions/loaData.py @@ -62,14 +62,13 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None): # 提取偶数列 features = df.iloc[0:, 1::2] # 复制 features DataFrame - features_copy = features.copy() + # features_copy = features.copy() # 使用 pd.concat 来追加副本到原始 DataFrame - features = pd.concat([features, features_copy], ignore_index=True, axis=1) + # features = pd.concat([features, features_copy], ignore_index=True, axis=1) # 计算变化率 # first_value = features.iloc[0, :] # 获取第一行的数据 # features_pct_change = (features - first_value) / first_value - # features = features_pct_change features.dropna(inplace=True) @@ -77,11 +76,15 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None): features = features.T - # 补全每一行到指定长度 + # 补全每一行到指定长度 features = features.apply(lambda row: fill_to_len(row, max_row_length, fill_rule), axis=1) + # 获取实际的列数 + actual_columns = features.shape[1] + features['label'] = labelName - features.columns = [f'feature{i+1}' for i in range(max_row_length)] + ['label'] + # 使用实际的列数来创建列名 + features.columns = [f'feature{i+1}' for i in range(actual_columns)] + ['label'] return features diff --git a/Qfunctions/saveToxlsx.py b/Qfunctions/saveToxlsx.py index e656808..717f431 100644 --- a/Qfunctions/saveToxlsx.py +++ b/Qfunctions/saveToxlsx.py @@ -1,7 +1,173 @@ import os +import pandas as pd +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import numpy as np def save_to_xlsx(project_name, file_name, data): - os.makedirs(f'Result/{project_name}', exist_ok=True) - data.to_excel(f'Result/{project_name}/{file_name}.xlsx', index=True) - print("Save successed to " + f'Result/{project_name}/{file_name}.xlsx') + folder_path = f'Result/{project_name}' + os.makedirs(folder_path, exist_ok=True) + data.to_excel(f'{folder_path}/{file_name}.xlsx', index=True) + print("Save successed to " + f'{folder_path}/{file_name}.xlsx') + save_to_pic(project_name=project_name, file_name=file_name) return + +def save_to_pic(project_name, file_name): + os.makedirs(f'Result/{project_name}', exist_ok=True) + if file_name == 'pca_2d': + draw_pca_2d(f'Result/{project_name}/{file_name}.xlsx') + print("Save successed to " + f'Result/{project_name}/{file_name}.png') + elif file_name == 'pca_3d': + draw_pca_3d(f'Result/{project_name}/{file_name}.xlsx') + print("Save successed to " + f'Result/{project_name}/{file_name}.png') + elif file_name == 'acc_and_loss': + draw_epoch_data(f'Result/{project_name}/{file_name}.xlsx') + draw_last_epoch_bar_chart(f'Result/{project_name}/{file_name}.xlsx') + print("Save successed to line graph and bar graph") + elif file_name == 'cm': + draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx') + print("Save successed cm") + elif file_name == 'cmn': + draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx') + print("Save successed cmn") + else: + print("unknow picture type") + + +def draw_pca_2d(file_path): + df = pd.read_excel(file_path) + plt.figure(figsize=(8, 6)) + plt.scatter(df['PC1'], df['PC2'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6) + plt.xlabel('PC1') + plt.ylabel('PC2') + plt.title('2D PCA') + plt.colorbar(label='Labels') + plt.savefig(file_path.replace('.xlsx', '.png')) + plt.close() + +def draw_pca_3d(file_path): + df = pd.read_excel(file_path) + fig = plt.figure(figsize=(8, 6)) + ax = fig.add_subplot(111, projection='3d') + scatter = ax.scatter(df['PC1'], df['PC2'], df['PC3'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6) + ax.set_xlabel('PC1') + ax.set_ylabel('PC2') + ax.set_zlabel('PC3') + ax.set_title('3D PCA') + fig.colorbar(scatter, ax=ax, label='Labels') + plt.savefig(file_path.replace('.xlsx', '.png')) + +def draw_epoch_data(file_path): + df = pd.read_excel(file_path) + epochs = df['epoch'] + train_loss = df['train_loss'] + train_accuracy = df['train_accuracy'] * 100 + test_accuracy = df['test_accuracy'] * 100 + f1_score = df['f1_score'] + precision = df['precision'] + recall = df['recall'] + + fig, axs = plt.subplots(2, 3, figsize=(18, 12)) + + # 折线图:训练损失 + axs[0, 0].plot(epochs, train_loss, 'b-', label='Train Loss') + axs[0, 0].set_xlabel('Epoch') + axs[0, 0].set_ylabel('Loss') + axs[0, 0].set_title('Training Loss over Epochs') + axs[0, 0].legend() + + # 折线图:训练准确率和测试准确率 + axs[0, 1].plot(epochs, train_accuracy, 'g-', label='Train Accuracy') + axs[0, 1].plot(epochs, test_accuracy, 'r-', label='Test Accuracy') + axs[0, 1].set_xlabel('Epoch') + axs[0, 1].set_ylabel('Accuracy (%)') + axs[0, 1].set_title('Train and Test Accuracy over Epochs') + axs[0, 1].legend() + + # 折线图:F1 Score + axs[0, 2].plot(epochs, f1_score, 'm-', label='F1 Score') + axs[0, 2].set_xlabel('Epoch') + axs[0, 2].set_ylabel('F1 Score') + axs[0, 2].set_title('F1 Score over Epochs') + axs[0, 2].legend() + + # 折线图:Precision + axs[1, 0].plot(epochs, precision, 'c-', label='Precision') + axs[1, 0].set_xlabel('Epoch') + axs[1, 0].set_ylabel('Precision') + axs[1, 0].set_title('Precision over Epochs') + axs[1, 0].legend() + + # 折线图:Recall + axs[1, 1].plot(epochs, recall, 'y-', label='Recall') + axs[1, 1].set_xlabel('Epoch') + axs[1, 1].set_ylabel('Recall') + axs[1, 1].set_title('Recall over Epochs') + axs[1, 1].legend() + + # 空白或额外的图表空间(如果需要) + axs[1, 2].axis('off') + + plt.tight_layout() + plt.savefig(file_path.replace('.xlsx', '_epoch.png')) + plt.close() + +def draw_last_epoch_bar_chart(file_path): + df = pd.read_excel(file_path) + last_epoch_data = df.iloc[-1] + + metrics = ['train_loss', 'train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall'] + values = [last_epoch_data[metric] for metric in metrics] + labels = ['Train Loss', 'Train Accuracy', 'Test Accuracy', 'F1 Score', 'Precision', 'Recall'] + + # 调整数值格式 + values[1] *= 100 # Train Accuracy + values[2] *= 100 # Test Accuracy + + plt.figure(figsize=(10, 6)) + plt.bar(labels, values, color=['blue', 'green', 'red', 'magenta', 'cyan', 'yellow']) + plt.xlabel('Metrics') + plt.ylabel('Values') + plt.title('Last Epoch Metrics') + plt.ylim(bottom=0) + + # 添加数值标签 + for i, value in enumerate(values): + plt.text(i, value + 0.01, f'{value:.2f}', ha='center') + + plt.tight_layout() + plt.savefig(file_path.replace('.xlsx', '_last_epoch_bar.png')) + plt.close() + +def draw_and_save_cm(file_path): + # 读取 Excel 文件 + df_cm = pd.read_excel(file_path) + + # 获取标签(假设 DataFrame 的列为类别标签) + labels = df_cm.columns[1:].tolist() + + # 获取混淆矩阵和归一化混淆矩阵的数值 + cm = df_cm.values[:, 1:] + + # 创建一个图像和子图 + fig, axs = plt.subplots(1, 2, figsize=(12, 6)) + + # 绘制普通混淆矩阵 + axs[0].imshow(cm, interpolation='nearest', cmap='Blues') + axs[0].set_title('Confusion Matrix') + axs[0].set_xlabel('Predicted') + axs[0].set_ylabel('True') + axs[0].set_xticks(np.arange(len(labels))) + axs[0].set_yticks(np.arange(len(labels))) + axs[0].set_xticklabels(labels) + axs[0].set_yticklabels(labels) + + # 添加数值标签 + for i in range(len(labels)): + for j in range(len(labels)): + axs[0].text(j, i, f'{cm[i, j]}', ha='center', va='center') + + # 调整布局并保存图像 + plt.tight_layout() + plt.savefig(file_path.replace('.xlsx', '.png')) + plt.close() diff --git a/Qtorch/Models/Qnn.py b/Qtorch/Models/Qnn.py index 55ac670..ddb3218 100644 --- a/Qtorch/Models/Qnn.py +++ b/Qtorch/Models/Qnn.py @@ -2,7 +2,7 @@ import torch import torch.nn as nn import pandas as pd from sklearn.decomposition import PCA -from sklearn.metrics import confusion_matrix +from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score from torch.utils.data import DataLoader, TensorDataset # from Qfunctions.divSet import divSet as ds @@ -27,20 +27,22 @@ class Qnn(nn.Module): 'epoch': [], 'train_loss': [], 'train_accuracy': [], - 'test_accuracy': [] + 'test_accuracy': [], + 'precision': [], + 'recall': [], + 'f1_score': [] } self.pca_2d, self.pca_3d = None, None - def __prepare_data(self): # 将data转换为tensor形式 - X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32).unsqueeze(1) + X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32) self.y_train = self.LABEL_ENCODER.fit_transform(self.y_train) y_train_tensor = torch.tensor(self.y_train, dtype=torch.long) - X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32).unsqueeze(1) + X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32) self.y_test = self.LABEL_ENCODER.transform(self.y_test) y_test_tensor = torch.tensor(self.y_test, dtype=torch.long) @@ -57,7 +59,7 @@ class Qnn(nn.Module): model = self.to(self.DEVICE) criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5) + optimizer = torch.optim.Adam(model.parameters(), lr=0.00001, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10) best_test_accuracy = 0 patience = 100 @@ -106,12 +108,24 @@ class Qnn(nn.Module): all_prob.extend(prob.cpu().numpy()) test_accuracy = correct_test / total_test - print(f'Epoch [{epoch+1}/{epochs_times}], Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, Test Accuracy: {test_accuracy*100:.2f}%') + f1 = f1_score(all_labels, all_predicted, average='macro') + precision = precision_score(all_labels, all_predicted, average='macro') + recall = recall_score(all_labels, all_predicted, average='macro') + + if (epoch + 1) % 10 == 0: + print('===============================================') + print(f'Epoch [{epoch + 1} / {epochs_times}]:') + print(f'Train Accuracy: {train_accuracy * 100:.2f}%, Test Accuracy: {test_accuracy*100:.2f}%, Loss: {train_loss:.4f}') + print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score:{f1:.4f}, ') + print('===============================================') self.epoch_data['epoch'].append(epoch+1) self.epoch_data['train_loss'].append(train_loss) self.epoch_data['train_accuracy'].append(train_accuracy) self.epoch_data['test_accuracy'].append(test_accuracy) + self.epoch_data['precision'].append(precision) + self.epoch_data['recall'].append(recall) + self.epoch_data['f1_score'].append(f1) scheduler.step(train_loss) @@ -125,7 +139,9 @@ class Qnn(nn.Module): print(f"Early stopping at epoch {epoch+1}") break - self.cm = confusion_matrix(all_labels, all_predicted, normalize='true') + self.cm = confusion_matrix(all_labels, all_predicted) + self.cmn = confusion_matrix(all_labels, all_predicted, normalize='true') + print(self.cm) return @@ -147,11 +163,14 @@ class Qnn(nn.Module): principalComponents = pca_3d.fit_transform(self.X_train) df_pca3d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3']) df_pca3d['labels'] = self.y_train - + return df_pca2d, df_pca3d def get_cm(self): return pd.DataFrame(self.cm, columns=self.labels, index=self.labels) + + def get_cmn(self): + return pd.DataFrame(self.cm, columns=self.labels, index=self.labels) def get_epoch_data(self): return pd.DataFrame(self.epoch_data) diff --git a/main.py b/main.py index 1be6a11..d2f256c 100644 --- a/main.py +++ b/main.py @@ -1,40 +1,42 @@ -from Qtorch.Models.Qcnn import QCNN +from Qtorch.Models.Qmlp import Qmlp from Qfunctions.divSet import divSet from Qfunctions.loaData import load_data from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx def main(): # 输入元数据文件夹名称 - projet_name = '20241228 Write' + projet_name = '20250623 FHH-write' # 请在[]内输入每一个分类的名称 - label_names = ['I', 'L', 'O', 'V', 'E', 'F', 'J', 'U', 'T'] + label_names = ['5', '2', '0', 'M', 'J', 'U'] print(label_names) data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx') X_train, X_test, y_train, y_test, encoder = divSet( data=data, labels=label_names, test_size= 0.3 ) - # model = Qmlp( - # X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, - # hidden_layers = [128], - # dropout_rate=0 - # ) + model = Qmlp( + X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, + hidden_layers = [16], + dropout_rate=0 + ) - model = QCNN( - X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, - dropout_rate=0 - ) + # model = QCNN( + # X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, + # dropout_rate=0 + # ) pca_2d, pca_3d = model.get_PCA() model.fit(300) cm = model.get_cm() + cmn = model.get_cmn() epoch_data = model.get_epoch_data() save_to_xlsx(project_name=projet_name, file_name="pca_2d", data=pca_2d) save_to_xlsx(project_name=projet_name, file_name="pca_3d", data=pca_3d) - save_to_xlsx(project_name=projet_name, file_name="cm", data=cm ) + save_to_xlsx(project_name=projet_name, file_name="cm", data=cm) + save_to_xlsx(project_name=projet_name, file_name="cmn", data=cmn) save_to_xlsx(project_name=projet_name, file_name="acc_and_loss", data=epoch_data) print("Done")