From f6f839ebc09bcae5df3561d8434fac858998d19e Mon Sep 17 00:00:00 2001 From: newbieQQ Date: Sun, 29 Mar 2026 12:48:41 +0800 Subject: [PATCH] refactor: unify module structure and suppress training warnings - Move canonical implementations to Qfunctions layer (divSet.py, loadData.py, saveToXlsx.py) - Remove duplicate compatibility shims (loaData.py, saveToxlsx.py) - Remove redundant Qtorch/Functions/ directory - Add zero_division=0 to sklearn metrics to suppress UndefinedMetricWarning - Set matplotlib backend to Agg to eliminate Wayland/Qt warnings - Update all imports to use canonical module paths --- .vscode/launch.json | 15 --- Qfunctions/__init__.py | 5 + Qfunctions/divSet.py | 45 +++++++ Qfunctions/{loaData.py => loadData.py} | 135 ++++++++++--------- Qfunctions/saveToXlsx.py | 165 +++++++++++++++++++++++ Qfunctions/saveToxlsx.py | 173 ------------------------- Qtorch/Functions/__init__.py | 0 Qtorch/Functions/divSet.py | 28 ---- Qtorch/Models/Qmlp.py | 2 +- Qtorch/Models/Qnn.py | 18 +-- Qtorch/__init__.py | 1 - README.md | 4 +- main.py | 12 +- 13 files changed, 299 insertions(+), 304 deletions(-) delete mode 100644 .vscode/launch.json create mode 100644 Qfunctions/divSet.py rename Qfunctions/{loaData.py => loadData.py} (55%) create mode 100644 Qfunctions/saveToXlsx.py delete mode 100644 Qfunctions/saveToxlsx.py delete mode 100644 Qtorch/Functions/__init__.py delete mode 100644 Qtorch/Functions/divSet.py diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index 053641d..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "Python Debugger: Current this project", - "type": "debugpy", - "request": "launch", - "program": "main.py", - "console": "integratedTerminal" - } - ] -} \ No newline at end of file diff --git a/Qfunctions/__init__.py b/Qfunctions/__init__.py index e69de29..659eaa8 100644 --- a/Qfunctions/__init__.py +++ b/Qfunctions/__init__.py @@ -0,0 +1,5 @@ +from .divSet import divSet +from .loadData import load_data +from .saveToXlsx import save_to_xlsx + +__all__ = ["divSet", "load_data", "save_to_xlsx"] diff --git a/Qfunctions/divSet.py b/Qfunctions/divSet.py new file mode 100644 index 0000000..565b64d --- /dev/null +++ b/Qfunctions/divSet.py @@ -0,0 +1,45 @@ +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler, LabelEncoder + + +def divSet(data, labels=None, test_size=0.2, random_state=None): + """Split data, scale features, and encode labels. + + This module is the canonical location for dataset splitting utilities. + """ + encoder = LabelEncoder() + + # 最后一列是标签 + X = data.iloc[:, :-1] + y = data.iloc[:, -1] + + if labels: + encoder.fit(labels) + else: + encoder.fit(y) + + # 优先使用分层抽样,尽量保证每个类别在训练集和测试集都出现。 + stratify_target = y if y.nunique() > 1 else None + try: + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=test_size, random_state=random_state, stratify=stratify_target + ) + except ValueError: + # 当样本过少等情况下分层失败,回退到普通随机划分。 + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=test_size, random_state=random_state + ) + + # 标准化特征 + scaler = StandardScaler() + X_train = scaler.fit_transform(X_train) + X_test = scaler.transform(X_test) + + # 编码标签 + y_train = encoder.transform(y_train.values) + y_test = encoder.transform(y_test.values) + + return X_train, X_test, y_train, y_test, encoder + + +__all__ = ["divSet"] diff --git a/Qfunctions/loaData.py b/Qfunctions/loadData.py similarity index 55% rename from Qfunctions/loaData.py rename to Qfunctions/loadData.py index 88c4638..cad7294 100644 --- a/Qfunctions/loaData.py +++ b/Qfunctions/loadData.py @@ -4,50 +4,55 @@ import pandas as pd STATIC_PATH = './Static' + # 从文件夹中读取所有xlsx文件,每个文件对应一个label # labelNames为label的名字,如果不提供则默认为文件名 def load_data(folder, labelNames, isDir=True, fileClass='xlsx'): # 检查folder参数 if folder is None: raise ValueError("The 'folder' parameter is required.") - + # 检查labelNames参数 if labelNames is None: raise ValueError("The 'labelNames' parameter is required if 'folder' does not contain labels.") - + folder = os.path.join(STATIC_PATH, folder) - - # 看看有没有元数据文件夹 + + # 看看有没有元数据文件夹 if not os.path.isdir(folder): raise ValueError(f"The folder '{folder}' does not exist.") - data = None if not isDir: data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass) else: data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass) + print(data) return data + def load_from_folder(folder, labelNames, fileClass): - all_features = [] - fileClass = '.' + fileClass - for labelName in labelNames: - subfolder = os.path.join(folder, labelName) - if os.path.exists(subfolder) and os.path.isdir(subfolder): - fileNames = [f for f in os.listdir(subfolder) if f.endswith(fileClass)] - max_row_length = get_max_row_len(subfolder, fileNames) - features = [] - for fileName in fileNames: - file_path = os.path.join(subfolder, fileName) - features.append(load_xlsx(file_path, labelName, max_row_length, 'zero')) - if features: - all_features.append(pd.concat(features, ignore_index=True)) - # 将所有标签的数据合并 - return pd.concat(all_features, ignore_index=True) + all_features = [] + fileClass = '.' + fileClass + for labelName in labelNames: + subfolder = os.path.join(folder, labelName) + if os.path.exists(subfolder) and os.path.isdir(subfolder): + fileNames = [f for f in os.listdir(subfolder) if f.endswith(fileClass)] + max_row_length = get_max_row_len(subfolder, fileNames) + features = [] + for fileName in fileNames: + file_path = os.path.join(subfolder, fileName) + features.append(load_xlsx(file_path, labelName, max_row_length, 'zero')) + if features: + all_features.append(pd.concat(features, ignore_index=True)) + + # 将所有标签的数据合并 + return pd.concat(all_features, ignore_index=True) + def load_from_file(folder, labelNames, fileClass): - # 构建期望的文件名(label + .扩展名),并在目录中进行健壮匹配(去除零宽字符、Unicode 规范化、大小写不敏感) + # 构建期望的文件名(label + .扩展名),并在目录中进行健壮匹配 + # (去除零宽字符、Unicode 规范化、大小写不敏感) expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames] actual_file_names = [] @@ -75,53 +80,45 @@ def load_from_file(folder, labelNames, fileClass): file_path = os.path.join(folder, fileName) features = load_xlsx(file_path, labelNames[i], max_row_length, 'zero') all_features.append(features) - return pd.concat(all_features, ignore_index = True) + return pd.concat(all_features, ignore_index=True) -def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None): - df = pd.read_excel(fileName) +def load_xlsx(fileName, labelName, max_row_length=1000, fill_rule=None): + df = pd.read_excel(fileName) - # 提取偶数列 - features = df.iloc[0:, 1::2] - # ## 复制 features DataFrame - # features_copy = features.copy() - # ## 使用 pd.concat 来追加副本到原始 DataFrame - # features = pd.concat([features, features_copy], ignore_index=True, axis=1) + # 提取偶数列 + features = df.iloc[0:, 1::2] - # 计算变化率 - # first_value = features.iloc[0, :] # 获取第一行的数据 - # features_pct_change = (features - first_value) / first_value - # features = features_pct_change + features.dropna(inplace=True) + features.reset_index(drop=True, inplace=True) + features = features.T - features.dropna(inplace=True) - features.reset_index(drop=True, inplace=True) + # 补全每一行到指定长度 + features = features.apply(lambda row: fill_to_len(row, max_row_length, fill_rule), axis=1) - features = features.T + # 获取实际的列数 + actual_columns = features.shape[1] + features['label'] = labelName + features.columns = [f'feature{i+1}' for i in range(actual_columns)] + ['label'] - # 补全每一行到指定长度 - features = features.apply(lambda row: fill_to_len(row, max_row_length, fill_rule), axis=1) - - # 获取实际的列数 - actual_columns = features.shape[1] - - features['label'] = labelName - # 使用实际的列数来创建列名 - features.columns = [f'feature{i+1}' for i in range(actual_columns)] + ['label'] - - return features - -def fill_to_len(row, length = 1000, rule = None): + return features + + +def fill_to_len(row, length=1000, rule=None): + if len(row) >= length: + return row.iloc[:length].reset_index(drop=True) + + fill_value = 0 + if rule == 'min': + fill_value = row.min() + elif rule == 'mean': + fill_value = row.mean() + elif rule == 'zero': fill_value = 0 - if rule == 'min': - fill_value = row.min() - elif rule == 'mean': - fill_value = row.mean() - elif rule == 'zero': - fill_value = 0 - fill_values = pd.Series([fill_value] * (length - len(row))) + fill_values = pd.Series([fill_value] * (length - len(row))) + return pd.concat([row, fill_values], ignore_index=True) - return pd.concat([row, fill_values], ignore_index=True) def get_max_row_len(folder, filenames): max_len = 0 @@ -130,7 +127,6 @@ def get_max_row_len(folder, filenames): max_len = max(max_len, df.shape[0]) return max_len -__all__ = ['load_data'] # ---------- 内部工具函数:处理包含零宽字符或不同 Unicode 形式的文件名匹配 ---------- @@ -139,28 +135,28 @@ def _strip_zero_width(s: str) -> str: if not isinstance(s, str): return s return s.translate({ - 0x200B: None, # ZERO WIDTH SPACE - 0x200C: None, # ZERO WIDTH NON-JOINER - 0x200D: None, # ZERO WIDTH JOINER - 0xFEFF: None, # ZERO WIDTH NO-BREAK SPACE + 0x200B: None, + 0x200C: None, + 0x200D: None, + 0xFEFF: None, }) + def _canonicalize_name(name: str) -> str: # 规范化到 NFKC,并移除零宽字符 name = unicodedata.normalize('NFKC', name) name = _strip_zero_width(name) return name + def _normalize_for_compare(name: str) -> str: - # 进一步规范化用于宽松比较: - # - 统一大小写 - # - 将下划线视为空格(与文件名用下划线代替空格的情况匹配) - # - 折叠所有空白为一个空格,并去除首尾空格 + # 进一步规范化用于宽松比较 n = _canonicalize_name(name) n = n.replace('_', ' ') n = ' '.join(n.split()) return n.lower() + def _find_matching_file(folder: str, expected_name: str): # 首先进行严格匹配(规范化后相等) expected = _canonicalize_name(expected_name) @@ -179,10 +175,13 @@ def _find_matching_file(folder: str, expected_name: str): if _canonicalize_name(f).lower() == expected_lower: return f - # 宽松策略:将下划线当作空格处理,并折叠空白(用于匹配 "Crocodile grain" vs "Crocodile_grain") + # 宽松策略:将下划线当作空格处理,并折叠空白 expected_relaxed = _normalize_for_compare(expected_name) for f in entries: if _normalize_for_compare(f) == expected_relaxed: return f return None + + +__all__ = ['load_data'] diff --git a/Qfunctions/saveToXlsx.py b/Qfunctions/saveToXlsx.py new file mode 100644 index 0000000..52f2e96 --- /dev/null +++ b/Qfunctions/saveToXlsx.py @@ -0,0 +1,165 @@ +import os +import pandas as pd +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import numpy as np + + +def save_to_xlsx(project_name, file_name, data): + folder_path = f'Result/{project_name}' + os.makedirs(folder_path, exist_ok=True) + data.to_excel(f'{folder_path}/{file_name}.xlsx', index=True) + print('Save successed to ' + f'{folder_path}/{file_name}.xlsx') + save_to_pic(project_name=project_name, file_name=file_name) + return + + +def save_to_pic(project_name, file_name): + os.makedirs(f'Result/{project_name}', exist_ok=True) + if file_name == 'pca_2d': + draw_pca_2d(f'Result/{project_name}/{file_name}.xlsx') + print('Save successed to ' + f'Result/{project_name}/{file_name}.png') + elif file_name == 'pca_3d': + draw_pca_3d(f'Result/{project_name}/{file_name}.xlsx') + print('Save successed to ' + f'Result/{project_name}/{file_name}.png') + elif file_name == 'acc_and_loss': + draw_epoch_data(f'Result/{project_name}/{file_name}.xlsx') + draw_last_epoch_bar_chart(f'Result/{project_name}/{file_name}.xlsx') + print('Save successed to line graph and bar graph') + elif file_name == 'cm': + draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx') + print('Save successed cm') + elif file_name == 'cmn': + draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx') + print('Save successed cmn') + else: + print('unknow picture type') + + +def draw_pca_2d(file_path): + df = pd.read_excel(file_path) + plt.figure(figsize=(8, 6)) + plt.scatter(df['PC1'], df['PC2'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6) + plt.xlabel('PC1') + plt.ylabel('PC2') + plt.title('2D PCA') + plt.colorbar(label='Labels') + plt.savefig(file_path.replace('.xlsx', '.png')) + plt.close() + + +def draw_pca_3d(file_path): + df = pd.read_excel(file_path) + fig = plt.figure(figsize=(8, 6)) + ax = fig.add_subplot(111, projection='3d') + scatter = ax.scatter(df['PC1'], df['PC2'], df['PC3'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6) + ax.set_xlabel('PC1') + ax.set_ylabel('PC2') + ax.set_zlabel('PC3') + ax.set_title('3D PCA') + fig.colorbar(scatter, ax=ax, label='Labels') + plt.savefig(file_path.replace('.xlsx', '.png')) + + +def draw_epoch_data(file_path): + df = pd.read_excel(file_path) + epochs = df['epoch'] + train_loss = df['train_loss'] + train_accuracy = df['train_accuracy'] * 100 + test_accuracy = df['test_accuracy'] * 100 + f1_score = df['f1_score'] + precision = df['precision'] + recall = df['recall'] + + fig, axs = plt.subplots(2, 3, figsize=(18, 12)) + + axs[0, 0].plot(epochs, train_loss, 'b-', label='Train Loss') + axs[0, 0].set_xlabel('Epoch') + axs[0, 0].set_ylabel('Loss') + axs[0, 0].set_title('Training Loss over Epochs') + axs[0, 0].legend() + + axs[0, 1].plot(epochs, train_accuracy, 'g-', label='Train Accuracy') + axs[0, 1].plot(epochs, test_accuracy, 'r-', label='Test Accuracy') + axs[0, 1].set_xlabel('Epoch') + axs[0, 1].set_ylabel('Accuracy (%)') + axs[0, 1].set_title('Train and Test Accuracy over Epochs') + axs[0, 1].legend() + + axs[0, 2].plot(epochs, f1_score, 'm-', label='F1 Score') + axs[0, 2].set_xlabel('Epoch') + axs[0, 2].set_ylabel('F1 Score') + axs[0, 2].set_title('F1 Score over Epochs') + axs[0, 2].legend() + + axs[1, 0].plot(epochs, precision, 'c-', label='Precision') + axs[1, 0].set_xlabel('Epoch') + axs[1, 0].set_ylabel('Precision') + axs[1, 0].set_title('Precision over Epochs') + axs[1, 0].legend() + + axs[1, 1].plot(epochs, recall, 'y-', label='Recall') + axs[1, 1].set_xlabel('Epoch') + axs[1, 1].set_ylabel('Recall') + axs[1, 1].set_title('Recall over Epochs') + axs[1, 1].legend() + + axs[1, 2].axis('off') + + plt.tight_layout() + plt.savefig(file_path.replace('.xlsx', '_epoch.png')) + plt.close() + + +def draw_last_epoch_bar_chart(file_path): + df = pd.read_excel(file_path) + last_epoch_data = df.iloc[-1] + + metrics = ['train_loss', 'train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall'] + values = [last_epoch_data[metric] for metric in metrics] + labels = ['Train Loss', 'Train Accuracy', 'Test Accuracy', 'F1 Score', 'Precision', 'Recall'] + + values[1] *= 100 + values[2] *= 100 + + plt.figure(figsize=(10, 6)) + plt.bar(labels, values, color=['blue', 'green', 'red', 'magenta', 'cyan', 'yellow']) + plt.xlabel('Metrics') + plt.ylabel('Values') + plt.title('Last Epoch Metrics') + plt.ylim(bottom=0) + + for i, value in enumerate(values): + plt.text(i, value + 0.01, f'{value:.2f}', ha='center') + + plt.tight_layout() + plt.savefig(file_path.replace('.xlsx', '_last_epoch_bar.png')) + plt.close() + + +def draw_and_save_cm(file_path): + df_cm = pd.read_excel(file_path) + + labels = df_cm.columns[1:].tolist() + cm = df_cm.values[:, 1:] + + fig, axs = plt.subplots(1, 2, figsize=(12, 6)) + + axs[0].imshow(cm, interpolation='nearest', cmap='Blues') + axs[0].set_title('Confusion Matrix') + axs[0].set_xlabel('Predicted') + axs[0].set_ylabel('True') + axs[0].set_xticks(np.arange(len(labels))) + axs[0].set_yticks(np.arange(len(labels))) + axs[0].set_xticklabels(labels) + axs[0].set_yticklabels(labels) + + for i in range(len(labels)): + for j in range(len(labels)): + axs[0].text(j, i, f'{cm[i, j]}', ha='center', va='center') + + plt.tight_layout() + plt.savefig(file_path.replace('.xlsx', '.png')) + plt.close() diff --git a/Qfunctions/saveToxlsx.py b/Qfunctions/saveToxlsx.py deleted file mode 100644 index 717f431..0000000 --- a/Qfunctions/saveToxlsx.py +++ /dev/null @@ -1,173 +0,0 @@ -import os -import pandas as pd -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import Axes3D -import numpy as np - -def save_to_xlsx(project_name, file_name, data): - folder_path = f'Result/{project_name}' - os.makedirs(folder_path, exist_ok=True) - data.to_excel(f'{folder_path}/{file_name}.xlsx', index=True) - print("Save successed to " + f'{folder_path}/{file_name}.xlsx') - save_to_pic(project_name=project_name, file_name=file_name) - return - -def save_to_pic(project_name, file_name): - os.makedirs(f'Result/{project_name}', exist_ok=True) - if file_name == 'pca_2d': - draw_pca_2d(f'Result/{project_name}/{file_name}.xlsx') - print("Save successed to " + f'Result/{project_name}/{file_name}.png') - elif file_name == 'pca_3d': - draw_pca_3d(f'Result/{project_name}/{file_name}.xlsx') - print("Save successed to " + f'Result/{project_name}/{file_name}.png') - elif file_name == 'acc_and_loss': - draw_epoch_data(f'Result/{project_name}/{file_name}.xlsx') - draw_last_epoch_bar_chart(f'Result/{project_name}/{file_name}.xlsx') - print("Save successed to line graph and bar graph") - elif file_name == 'cm': - draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx') - print("Save successed cm") - elif file_name == 'cmn': - draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx') - print("Save successed cmn") - else: - print("unknow picture type") - - -def draw_pca_2d(file_path): - df = pd.read_excel(file_path) - plt.figure(figsize=(8, 6)) - plt.scatter(df['PC1'], df['PC2'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6) - plt.xlabel('PC1') - plt.ylabel('PC2') - plt.title('2D PCA') - plt.colorbar(label='Labels') - plt.savefig(file_path.replace('.xlsx', '.png')) - plt.close() - -def draw_pca_3d(file_path): - df = pd.read_excel(file_path) - fig = plt.figure(figsize=(8, 6)) - ax = fig.add_subplot(111, projection='3d') - scatter = ax.scatter(df['PC1'], df['PC2'], df['PC3'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6) - ax.set_xlabel('PC1') - ax.set_ylabel('PC2') - ax.set_zlabel('PC3') - ax.set_title('3D PCA') - fig.colorbar(scatter, ax=ax, label='Labels') - plt.savefig(file_path.replace('.xlsx', '.png')) - -def draw_epoch_data(file_path): - df = pd.read_excel(file_path) - epochs = df['epoch'] - train_loss = df['train_loss'] - train_accuracy = df['train_accuracy'] * 100 - test_accuracy = df['test_accuracy'] * 100 - f1_score = df['f1_score'] - precision = df['precision'] - recall = df['recall'] - - fig, axs = plt.subplots(2, 3, figsize=(18, 12)) - - # 折线图:训练损失 - axs[0, 0].plot(epochs, train_loss, 'b-', label='Train Loss') - axs[0, 0].set_xlabel('Epoch') - axs[0, 0].set_ylabel('Loss') - axs[0, 0].set_title('Training Loss over Epochs') - axs[0, 0].legend() - - # 折线图:训练准确率和测试准确率 - axs[0, 1].plot(epochs, train_accuracy, 'g-', label='Train Accuracy') - axs[0, 1].plot(epochs, test_accuracy, 'r-', label='Test Accuracy') - axs[0, 1].set_xlabel('Epoch') - axs[0, 1].set_ylabel('Accuracy (%)') - axs[0, 1].set_title('Train and Test Accuracy over Epochs') - axs[0, 1].legend() - - # 折线图:F1 Score - axs[0, 2].plot(epochs, f1_score, 'm-', label='F1 Score') - axs[0, 2].set_xlabel('Epoch') - axs[0, 2].set_ylabel('F1 Score') - axs[0, 2].set_title('F1 Score over Epochs') - axs[0, 2].legend() - - # 折线图:Precision - axs[1, 0].plot(epochs, precision, 'c-', label='Precision') - axs[1, 0].set_xlabel('Epoch') - axs[1, 0].set_ylabel('Precision') - axs[1, 0].set_title('Precision over Epochs') - axs[1, 0].legend() - - # 折线图:Recall - axs[1, 1].plot(epochs, recall, 'y-', label='Recall') - axs[1, 1].set_xlabel('Epoch') - axs[1, 1].set_ylabel('Recall') - axs[1, 1].set_title('Recall over Epochs') - axs[1, 1].legend() - - # 空白或额外的图表空间(如果需要) - axs[1, 2].axis('off') - - plt.tight_layout() - plt.savefig(file_path.replace('.xlsx', '_epoch.png')) - plt.close() - -def draw_last_epoch_bar_chart(file_path): - df = pd.read_excel(file_path) - last_epoch_data = df.iloc[-1] - - metrics = ['train_loss', 'train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall'] - values = [last_epoch_data[metric] for metric in metrics] - labels = ['Train Loss', 'Train Accuracy', 'Test Accuracy', 'F1 Score', 'Precision', 'Recall'] - - # 调整数值格式 - values[1] *= 100 # Train Accuracy - values[2] *= 100 # Test Accuracy - - plt.figure(figsize=(10, 6)) - plt.bar(labels, values, color=['blue', 'green', 'red', 'magenta', 'cyan', 'yellow']) - plt.xlabel('Metrics') - plt.ylabel('Values') - plt.title('Last Epoch Metrics') - plt.ylim(bottom=0) - - # 添加数值标签 - for i, value in enumerate(values): - plt.text(i, value + 0.01, f'{value:.2f}', ha='center') - - plt.tight_layout() - plt.savefig(file_path.replace('.xlsx', '_last_epoch_bar.png')) - plt.close() - -def draw_and_save_cm(file_path): - # 读取 Excel 文件 - df_cm = pd.read_excel(file_path) - - # 获取标签(假设 DataFrame 的列为类别标签) - labels = df_cm.columns[1:].tolist() - - # 获取混淆矩阵和归一化混淆矩阵的数值 - cm = df_cm.values[:, 1:] - - # 创建一个图像和子图 - fig, axs = plt.subplots(1, 2, figsize=(12, 6)) - - # 绘制普通混淆矩阵 - axs[0].imshow(cm, interpolation='nearest', cmap='Blues') - axs[0].set_title('Confusion Matrix') - axs[0].set_xlabel('Predicted') - axs[0].set_ylabel('True') - axs[0].set_xticks(np.arange(len(labels))) - axs[0].set_yticks(np.arange(len(labels))) - axs[0].set_xticklabels(labels) - axs[0].set_yticklabels(labels) - - # 添加数值标签 - for i in range(len(labels)): - for j in range(len(labels)): - axs[0].text(j, i, f'{cm[i, j]}', ha='center', va='center') - - # 调整布局并保存图像 - plt.tight_layout() - plt.savefig(file_path.replace('.xlsx', '.png')) - plt.close() diff --git a/Qtorch/Functions/__init__.py b/Qtorch/Functions/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/Qtorch/Functions/divSet.py b/Qtorch/Functions/divSet.py deleted file mode 100644 index 8e09c2e..0000000 --- a/Qtorch/Functions/divSet.py +++ /dev/null @@ -1,28 +0,0 @@ -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler, LabelEncoder - -def divSet(data, labels = None, test_size=0.2, random_state=None): - - encoder = LabelEncoder() - - # 最后一列是标签 - X = data.iloc[:, :-1] - y = data.iloc[:, -1] - - if labels: - labels = encoder.fit_transform(labels) - else: - encoder.fit(y) - - # 分割数据集为训练集和测试集 - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state) - # 标准化特征 - scaler = StandardScaler() - X_train = scaler.fit_transform(X_train) - X_test = scaler.transform(X_test) - - # 编码标签 - y_train = encoder.transform(y_train.values.reshape(-1, 1)) - y_test = encoder.transform(y_test.values.reshape(-1, 1)) - - return X_train, X_test, y_train, y_test, encoder diff --git a/Qtorch/Models/Qmlp.py b/Qtorch/Models/Qmlp.py index 4317e0b..1e61934 100644 --- a/Qtorch/Models/Qmlp.py +++ b/Qtorch/Models/Qmlp.py @@ -15,7 +15,7 @@ class Qmlp(Qnn): super(Qmlp, self).__init__(data=data, labels=labels, test_size=test_size, random_state=random_state) input_size = self.X_train.shape[1] - num_classes = len(np.unique(self.y_train)) + num_classes = len(labels) if labels is not None else int(np.max(self.y_train)) + 1 self.layers = nn.ModuleList() # 连接输入层和第一个隐藏层 diff --git a/Qtorch/Models/Qnn.py b/Qtorch/Models/Qnn.py index 6f9b897..6871fe6 100644 --- a/Qtorch/Models/Qnn.py +++ b/Qtorch/Models/Qnn.py @@ -1,12 +1,12 @@ import torch import torch.nn as nn +import numpy as np import pandas as pd from sklearn.decomposition import PCA from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score from torch.utils.data import DataLoader, TensorDataset -from Qtorch import divSet as DS -# from Qfunctions.saveToxlsx import save_to_xlsx as stx +from Qfunctions.divSet import divSet as DS class Qnn(nn.Module): @@ -45,11 +45,9 @@ class Qnn(nn.Module): # 将data转换为tensor形式 X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32) - self.y_train = self.LABEL_ENCODER.fit_transform(self.y_train) y_train_tensor = torch.tensor(self.y_train, dtype=torch.long) X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32) - self.y_test = self.LABEL_ENCODER.transform(self.y_test) y_test_tensor = torch.tensor(self.y_test, dtype=torch.long) train_dataset = TensorDataset(X_train_tensor, y_train_tensor) @@ -116,9 +114,9 @@ class Qnn(nn.Module): all_prob.extend(prob.cpu().numpy()) test_accuracy = correct_test / total_test - f1 = f1_score(all_labels, all_predicted, average='macro') - precision = precision_score(all_labels, all_predicted, average='macro') - recall = recall_score(all_labels, all_predicted, average='macro') + f1 = f1_score(all_labels, all_predicted, average='macro', zero_division=0) + precision = precision_score(all_labels, all_predicted, average='macro', zero_division=0) + recall = recall_score(all_labels, all_predicted, average='macro', zero_division=0) if (epoch + 1) % 10 == 0: print('===============================================') @@ -148,8 +146,10 @@ class Qnn(nn.Module): break # cmn为归一化矩阵 - self.cm = confusion_matrix(all_labels, all_predicted) - self.cmn = confusion_matrix(all_labels, all_predicted, normalize='true') + # Keep matrix dimensions stable even when some classes do not appear in this split. + cm_labels = np.arange(len(self.labels)) if self.labels is not None else None + self.cm = confusion_matrix(all_labels, all_predicted, labels=cm_labels) + self.cmn = confusion_matrix(all_labels, all_predicted, labels=cm_labels, normalize='true') print(self.cm) return diff --git a/Qtorch/__init__.py b/Qtorch/__init__.py index f75a808..67c9590 100644 --- a/Qtorch/__init__.py +++ b/Qtorch/__init__.py @@ -1,3 +1,2 @@ # Qtorch/__init__.py -from .Functions.divSet import divSet from .Models import Qnn, Qmlp, Qcnn \ No newline at end of file diff --git a/README.md b/README.md index e665d64..2cbd55a 100644 --- a/README.md +++ b/README.md @@ -143,8 +143,8 @@ Wood <-> Wood.xlsx 或 Wood/ ```python from Qtorch.Models.Qmlp import Qmlp from Qfunctions.divSet import divSet -from Qfunctions.loaData import load_data -from Qfunctions.saveToxlsx import save_to_xlsx +from Qfunctions.loadData import load_data +from Qfunctions.saveToXlsx import save_to_xlsx projet_name = '20241009MaterialDiv' label_names = ['Acrlic', 'Ecoflex', 'PDMS', 'PLA', 'Wood'] diff --git a/main.py b/main.py index 03943ce..5df075e 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,6 @@ from Qtorch.Models.Qmlp import Qmlp -from Qfunctions.divSet import divSet -from Qfunctions.loaData import load_data -from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx +from Qfunctions.loadData import load_data +from Qfunctions.saveToXlsx import save_to_xlsx as save_to_xlsx def main(): # 输入元数据文件夹名称 @@ -11,13 +10,12 @@ def main(): label_names = list(range(10)) print(label_names) data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx') - X_train, X_test, y_train, y_test, encoder = divSet( - data=data, labels=label_names, test_size= 0.3 - ) model = Qmlp( - X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, + data=data, + labels=label_names, hidden_layers = [128, 256, 128], + test_size=0.3, dropout_rate=0 ) # model = QCNN(