refactor: unify module structure and suppress training warnings

- Move canonical implementations to Qfunctions layer (divSet.py, loadData.py, saveToXlsx.py) - Remove duplicate compatibility shims (loaData.py, saveToxlsx.py) - Remove redundant Qtorch/Functions/ directory - Add zero_division=0 to sklearn metrics to suppress UndefinedMetricWarning - Set matplotlib backend to Agg to eliminate Wayland/Qt warnings - Update all imports to use canonical module paths
2026-03-29 12:48:41 +08:00 · 2026-03-29 12:48:41 +08:00 · f6f839ebc0
parent 353af6ab45
commit f6f839ebc0
13 changed files with 299 additions and 304 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -1,15 +0,0 @@
-{
-  // Use IntelliSense to learn about possible attributes.
-  // Hover to view descriptions of existing attributes.
-  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-  "version": "0.2.0",
-  "configurations": [
-    {
-      "name": "Python Debugger: Current this project",
-      "type": "debugpy",
-      "request": "launch",
-      "program": "main.py",
-      "console": "integratedTerminal"
-    }
-  ]
-}
--- a/Qfunctions/init.py
+++ b/Qfunctions/init.py
@ -0,0 +1,5 @@
+from .divSet import divSet
+from .loadData import load_data
+from .saveToXlsx import save_to_xlsx
+
+__all__ = ["divSet", "load_data", "save_to_xlsx"]
--- a/Qfunctions/divSet.py
+++ b/Qfunctions/divSet.py
@ -0,0 +1,45 @@
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+
+
+def divSet(data, labels=None, test_size=0.2, random_state=None):
+  """Split data, scale features, and encode labels.
+
+  This module is the canonical location for dataset splitting utilities.
+  """
+  encoder = LabelEncoder()
+
+  # 最后一列是标签
+  X = data.iloc[:, :-1]
+  y = data.iloc[:, -1]
+
+  if labels:
+    encoder.fit(labels)
+  else:
+    encoder.fit(y)
+
+  # 优先使用分层抽样，尽量保证每个类别在训练集和测试集都出现。
+  stratify_target = y if y.nunique() > 1 else None
+  try:
+    X_train, X_test, y_train, y_test = train_test_split(
+      X, y, test_size=test_size, random_state=random_state, stratify=stratify_target
+    )
+  except ValueError:
+    # 当样本过少等情况下分层失败，回退到普通随机划分。
+    X_train, X_test, y_train, y_test = train_test_split(
+      X, y, test_size=test_size, random_state=random_state
+    )
+
+  # 标准化特征
+  scaler = StandardScaler()
+  X_train = scaler.fit_transform(X_train)
+  X_test = scaler.transform(X_test)
+
+  # 编码标签
+  y_train = encoder.transform(y_train.values)
+  y_test = encoder.transform(y_test.values)
+
+  return X_train, X_test, y_train, y_test, encoder
+
+
+__all__ = ["divSet"]
--- a/Qfunctions/loadData.py
+++ b/Qfunctions/loadData.py
@ -4,6 +4,7 @@ import pandas as pd

 STATIC_PATH = './Static'

+
 # 从文件夹中读取所有xlsx文件，每个文件对应一个label
 # labelNames为label的名字，如果不提供则默认为文件名
 def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
@ -21,14 +22,15 @@ def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
  if not os.path.isdir(folder):
    raise ValueError(f"The folder '{folder}' does not exist.")

-  data = None
  if not isDir:
    data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass)
  else:
    data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass)
+
  print(data)
  return data

+
 def load_from_folder(folder, labelNames, fileClass):
  all_features = []
  fileClass = '.' + fileClass
@ -43,11 +45,14 @@ def load_from_folder(folder, labelNames, fileClass):
        features.append(load_xlsx(file_path, labelName, max_row_length, 'zero'))
      if features:
        all_features.append(pd.concat(features, ignore_index=True))
+
  # 将所有标签的数据合并
  return pd.concat(all_features, ignore_index=True)

+
 def load_from_file(folder, labelNames, fileClass):
-  # 构建期望的文件名（label + .扩展名），并在目录中进行健壮匹配（去除零宽字符、Unicode 规范化、大小写不敏感）
+  # 构建期望的文件名（label + .扩展名），并在目录中进行健壮匹配
+  # （去除零宽字符、Unicode 规范化、大小写不敏感）
  expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames]

  actual_file_names = []
@ -83,19 +88,9 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None):

  # 提取偶数列
  features = df.iloc[0:, 1::2]
-    # ## 复制 features DataFrame
-    # features_copy = features.copy()
-    # ## 使用 pd.concat 来追加副本到原始 DataFrame
-    # features = pd.concat([features, features_copy], ignore_index=True, axis=1)
-
-    # 计算变化率
-    # first_value = features.iloc[0, :]  # 获取第一行的数据
-    # features_pct_change = (features - first_value) / first_value
-    # features = features_pct_change

  features.dropna(inplace=True)
  features.reset_index(drop=True, inplace=True)
-
  features = features.T

  # 补全每一行到指定长度
@ -103,26 +98,28 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None):

  # 获取实际的列数
  actual_columns = features.shape[1]
-    
  features['label'] = labelName
-    # 使用实际的列数来创建列名
  features.columns = [f'feature{i+1}' for i in range(actual_columns)] + ['label']

  return features

-def fill_to_len(row, length = 1000, rule = None):
-    fill_value = 0

+def fill_to_len(row, length=1000, rule=None):
+  if len(row) >= length:
+    return row.iloc[:length].reset_index(drop=True)
+
+  fill_value = 0
  if rule == 'min':
    fill_value = row.min()
  elif rule == 'mean':
    fill_value = row.mean()
  elif rule == 'zero':
    fill_value = 0
-    fill_values = pd.Series([fill_value] * (length - len(row)))

+  fill_values = pd.Series([fill_value] * (length - len(row)))
  return pd.concat([row, fill_values], ignore_index=True)

+
 def get_max_row_len(folder, filenames):
  max_len = 0
  for filename in filenames:
@ -130,7 +127,6 @@ def get_max_row_len(folder, filenames):
    max_len = max(max_len, df.shape[0])
  return max_len

-__all__ = ['load_data']

 # ---------- 内部工具函数：处理包含零宽字符或不同 Unicode 形式的文件名匹配 ----------

@ -139,28 +135,28 @@ def _strip_zero_width(s: str) -> str:
  if not isinstance(s, str):
    return s
  return s.translate({
-    0x200B: None,  # ZERO WIDTH SPACE
-    0x200C: None,  # ZERO WIDTH NON-JOINER
-    0x200D: None,  # ZERO WIDTH JOINER
-    0xFEFF: None,  # ZERO WIDTH NO-BREAK SPACE
+    0x200B: None,
+    0x200C: None,
+    0x200D: None,
+    0xFEFF: None,
  })

+
 def _canonicalize_name(name: str) -> str:
  # 规范化到 NFKC，并移除零宽字符
  name = unicodedata.normalize('NFKC', name)
  name = _strip_zero_width(name)
  return name

+
 def _normalize_for_compare(name: str) -> str:
-  # 进一步规范化用于宽松比较：
-  # - 统一大小写
-  # - 将下划线视为空格（与文件名用下划线代替空格的情况匹配）
-  # - 折叠所有空白为一个空格，并去除首尾空格
+  # 进一步规范化用于宽松比较
  n = _canonicalize_name(name)
  n = n.replace('_', ' ')
  n = ' '.join(n.split())
  return n.lower()

+
 def _find_matching_file(folder: str, expected_name: str):
  # 首先进行严格匹配（规范化后相等）
  expected = _canonicalize_name(expected_name)
@ -179,10 +175,13 @@ def _find_matching_file(folder: str, expected_name: str):
    if _canonicalize_name(f).lower() == expected_lower:
      return f

-  # 宽松策略：将下划线当作空格处理，并折叠空白（用于匹配 "Crocodile grain" vs "Crocodile_grain"）
+  # 宽松策略：将下划线当作空格处理，并折叠空白
  expected_relaxed = _normalize_for_compare(expected_name)
  for f in entries:
    if _normalize_for_compare(f) == expected_relaxed:
      return f

  return None
+
+
+__all__ = ['load_data']
--- a/Qfunctions/saveToXlsx.py
+++ b/Qfunctions/saveToXlsx.py
@ -0,0 +1,165 @@
+import os
+import pandas as pd
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
+
+
+def save_to_xlsx(project_name, file_name, data):
+  folder_path = f'Result/{project_name}'
+  os.makedirs(folder_path, exist_ok=True)
+  data.to_excel(f'{folder_path}/{file_name}.xlsx', index=True)
+  print('Save successed to ' + f'{folder_path}/{file_name}.xlsx')
+  save_to_pic(project_name=project_name, file_name=file_name)
+  return
+
+
+def save_to_pic(project_name, file_name):
+  os.makedirs(f'Result/{project_name}', exist_ok=True)
+  if file_name == 'pca_2d':
+    draw_pca_2d(f'Result/{project_name}/{file_name}.xlsx')
+    print('Save successed to ' + f'Result/{project_name}/{file_name}.png')
+  elif file_name == 'pca_3d':
+    draw_pca_3d(f'Result/{project_name}/{file_name}.xlsx')
+    print('Save successed to ' + f'Result/{project_name}/{file_name}.png')
+  elif file_name == 'acc_and_loss':
+    draw_epoch_data(f'Result/{project_name}/{file_name}.xlsx')
+    draw_last_epoch_bar_chart(f'Result/{project_name}/{file_name}.xlsx')
+    print('Save successed to line graph and bar graph')
+  elif file_name == 'cm':
+    draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
+    print('Save successed cm')
+  elif file_name == 'cmn':
+    draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
+    print('Save successed cmn')
+  else:
+    print('unknow picture type')
+
+
+def draw_pca_2d(file_path):
+  df = pd.read_excel(file_path)
+  plt.figure(figsize=(8, 6))
+  plt.scatter(df['PC1'], df['PC2'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
+  plt.xlabel('PC1')
+  plt.ylabel('PC2')
+  plt.title('2D PCA')
+  plt.colorbar(label='Labels')
+  plt.savefig(file_path.replace('.xlsx', '.png'))
+  plt.close()
+
+
+def draw_pca_3d(file_path):
+  df = pd.read_excel(file_path)
+  fig = plt.figure(figsize=(8, 6))
+  ax = fig.add_subplot(111, projection='3d')
+  scatter = ax.scatter(df['PC1'], df['PC2'], df['PC3'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
+  ax.set_xlabel('PC1')
+  ax.set_ylabel('PC2')
+  ax.set_zlabel('PC3')
+  ax.set_title('3D PCA')
+  fig.colorbar(scatter, ax=ax, label='Labels')
+  plt.savefig(file_path.replace('.xlsx', '.png'))
+
+
+def draw_epoch_data(file_path):
+  df = pd.read_excel(file_path)
+  epochs = df['epoch']
+  train_loss = df['train_loss']
+  train_accuracy = df['train_accuracy'] * 100
+  test_accuracy = df['test_accuracy'] * 100
+  f1_score = df['f1_score']
+  precision = df['precision']
+  recall = df['recall']
+
+  fig, axs = plt.subplots(2, 3, figsize=(18, 12))
+
+  axs[0, 0].plot(epochs, train_loss, 'b-', label='Train Loss')
+  axs[0, 0].set_xlabel('Epoch')
+  axs[0, 0].set_ylabel('Loss')
+  axs[0, 0].set_title('Training Loss over Epochs')
+  axs[0, 0].legend()
+
+  axs[0, 1].plot(epochs, train_accuracy, 'g-', label='Train Accuracy')
+  axs[0, 1].plot(epochs, test_accuracy, 'r-', label='Test Accuracy')
+  axs[0, 1].set_xlabel('Epoch')
+  axs[0, 1].set_ylabel('Accuracy (%)')
+  axs[0, 1].set_title('Train and Test Accuracy over Epochs')
+  axs[0, 1].legend()
+
+  axs[0, 2].plot(epochs, f1_score, 'm-', label='F1 Score')
+  axs[0, 2].set_xlabel('Epoch')
+  axs[0, 2].set_ylabel('F1 Score')
+  axs[0, 2].set_title('F1 Score over Epochs')
+  axs[0, 2].legend()
+
+  axs[1, 0].plot(epochs, precision, 'c-', label='Precision')
+  axs[1, 0].set_xlabel('Epoch')
+  axs[1, 0].set_ylabel('Precision')
+  axs[1, 0].set_title('Precision over Epochs')
+  axs[1, 0].legend()
+
+  axs[1, 1].plot(epochs, recall, 'y-', label='Recall')
+  axs[1, 1].set_xlabel('Epoch')
+  axs[1, 1].set_ylabel('Recall')
+  axs[1, 1].set_title('Recall over Epochs')
+  axs[1, 1].legend()
+
+  axs[1, 2].axis('off')
+
+  plt.tight_layout()
+  plt.savefig(file_path.replace('.xlsx', '_epoch.png'))
+  plt.close()
+
+
+def draw_last_epoch_bar_chart(file_path):
+  df = pd.read_excel(file_path)
+  last_epoch_data = df.iloc[-1]
+
+  metrics = ['train_loss', 'train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall']
+  values = [last_epoch_data[metric] for metric in metrics]
+  labels = ['Train Loss', 'Train Accuracy', 'Test Accuracy', 'F1 Score', 'Precision', 'Recall']
+
+  values[1] *= 100
+  values[2] *= 100
+
+  plt.figure(figsize=(10, 6))
+  plt.bar(labels, values, color=['blue', 'green', 'red', 'magenta', 'cyan', 'yellow'])
+  plt.xlabel('Metrics')
+  plt.ylabel('Values')
+  plt.title('Last Epoch Metrics')
+  plt.ylim(bottom=0)
+
+  for i, value in enumerate(values):
+    plt.text(i, value + 0.01, f'{value:.2f}', ha='center')
+
+  plt.tight_layout()
+  plt.savefig(file_path.replace('.xlsx', '_last_epoch_bar.png'))
+  plt.close()
+
+
+def draw_and_save_cm(file_path):
+  df_cm = pd.read_excel(file_path)
+
+  labels = df_cm.columns[1:].tolist()
+  cm = df_cm.values[:, 1:]
+
+  fig, axs = plt.subplots(1, 2, figsize=(12, 6))
+
+  axs[0].imshow(cm, interpolation='nearest', cmap='Blues')
+  axs[0].set_title('Confusion Matrix')
+  axs[0].set_xlabel('Predicted')
+  axs[0].set_ylabel('True')
+  axs[0].set_xticks(np.arange(len(labels)))
+  axs[0].set_yticks(np.arange(len(labels)))
+  axs[0].set_xticklabels(labels)
+  axs[0].set_yticklabels(labels)
+
+  for i in range(len(labels)):
+    for j in range(len(labels)):
+      axs[0].text(j, i, f'{cm[i, j]}', ha='center', va='center')
+
+  plt.tight_layout()
+  plt.savefig(file_path.replace('.xlsx', '.png'))
+  plt.close()
--- a/Qfunctions/saveToxlsx.py
+++ b/Qfunctions/saveToxlsx.py
@ -1,173 +0,0 @@
-import os
-import pandas as pd
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D
-import numpy as np
-
-def save_to_xlsx(project_name, file_name, data):
-    folder_path = f'Result/{project_name}'
-    os.makedirs(folder_path, exist_ok=True)
-    data.to_excel(f'{folder_path}/{file_name}.xlsx', index=True)
-    print("Save successed to " + f'{folder_path}/{file_name}.xlsx')
-    save_to_pic(project_name=project_name, file_name=file_name)
-    return
-
-def save_to_pic(project_name, file_name):
-    os.makedirs(f'Result/{project_name}', exist_ok=True)
-    if file_name == 'pca_2d':
-        draw_pca_2d(f'Result/{project_name}/{file_name}.xlsx')
-        print("Save successed to " + f'Result/{project_name}/{file_name}.png')
-    elif file_name == 'pca_3d':
-        draw_pca_3d(f'Result/{project_name}/{file_name}.xlsx')
-        print("Save successed to " + f'Result/{project_name}/{file_name}.png')
-    elif file_name == 'acc_and_loss':
-        draw_epoch_data(f'Result/{project_name}/{file_name}.xlsx')
-        draw_last_epoch_bar_chart(f'Result/{project_name}/{file_name}.xlsx')
-        print("Save successed to line graph and bar graph")
-    elif file_name == 'cm':
-        draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
-        print("Save successed cm")
-    elif file_name == 'cmn':
-        draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
-        print("Save successed cmn")
-    else:
-        print("unknow picture type")
-
-
-def draw_pca_2d(file_path):
-    df = pd.read_excel(file_path)
-    plt.figure(figsize=(8, 6))
-    plt.scatter(df['PC1'], df['PC2'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
-    plt.xlabel('PC1')
-    plt.ylabel('PC2')
-    plt.title('2D PCA')
-    plt.colorbar(label='Labels')
-    plt.savefig(file_path.replace('.xlsx', '.png'))
-    plt.close()
-
-def draw_pca_3d(file_path):
-    df = pd.read_excel(file_path)
-    fig = plt.figure(figsize=(8, 6))
-    ax = fig.add_subplot(111, projection='3d')
-    scatter = ax.scatter(df['PC1'], df['PC2'], df['PC3'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
-    ax.set_xlabel('PC1')
-    ax.set_ylabel('PC2')
-    ax.set_zlabel('PC3')
-    ax.set_title('3D PCA')
-    fig.colorbar(scatter, ax=ax, label='Labels')
-    plt.savefig(file_path.replace('.xlsx', '.png'))
-
-def draw_epoch_data(file_path):
-    df = pd.read_excel(file_path)
-    epochs = df['epoch']
-    train_loss = df['train_loss']
-    train_accuracy = df['train_accuracy'] * 100
-    test_accuracy = df['test_accuracy'] * 100
-    f1_score = df['f1_score']
-    precision = df['precision']
-    recall = df['recall']
-
-    fig, axs = plt.subplots(2, 3, figsize=(18, 12))
-
-    # 折线图：训练损失
-    axs[0, 0].plot(epochs, train_loss, 'b-', label='Train Loss')
-    axs[0, 0].set_xlabel('Epoch')
-    axs[0, 0].set_ylabel('Loss')
-    axs[0, 0].set_title('Training Loss over Epochs')
-    axs[0, 0].legend()
-
-    # 折线图：训练准确率和测试准确率
-    axs[0, 1].plot(epochs, train_accuracy, 'g-', label='Train Accuracy')
-    axs[0, 1].plot(epochs, test_accuracy, 'r-', label='Test Accuracy')
-    axs[0, 1].set_xlabel('Epoch')
-    axs[0, 1].set_ylabel('Accuracy (%)')
-    axs[0, 1].set_title('Train and Test Accuracy over Epochs')
-    axs[0, 1].legend()
-
-    # 折线图：F1 Score
-    axs[0, 2].plot(epochs, f1_score, 'm-', label='F1 Score')
-    axs[0, 2].set_xlabel('Epoch')
-    axs[0, 2].set_ylabel('F1 Score')
-    axs[0, 2].set_title('F1 Score over Epochs')
-    axs[0, 2].legend()
-
-    # 折线图：Precision
-    axs[1, 0].plot(epochs, precision, 'c-', label='Precision')
-    axs[1, 0].set_xlabel('Epoch')
-    axs[1, 0].set_ylabel('Precision')
-    axs[1, 0].set_title('Precision over Epochs')
-    axs[1, 0].legend()
-
-    # 折线图：Recall
-    axs[1, 1].plot(epochs, recall, 'y-', label='Recall')
-    axs[1, 1].set_xlabel('Epoch')
-    axs[1, 1].set_ylabel('Recall')
-    axs[1, 1].set_title('Recall over Epochs')
-    axs[1, 1].legend()
-
-    # 空白或额外的图表空间（如果需要）
-    axs[1, 2].axis('off')
-
-    plt.tight_layout()
-    plt.savefig(file_path.replace('.xlsx', '_epoch.png'))
-    plt.close() 
-
-def draw_last_epoch_bar_chart(file_path):
-    df = pd.read_excel(file_path)
-    last_epoch_data = df.iloc[-1]
-
-    metrics = ['train_loss', 'train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall']
-    values = [last_epoch_data[metric] for metric in metrics]
-    labels = ['Train Loss', 'Train Accuracy', 'Test Accuracy', 'F1 Score', 'Precision', 'Recall']
-
-    # 调整数值格式
-    values[1] *= 100  # Train Accuracy
-    values[2] *= 100  # Test Accuracy
-
-    plt.figure(figsize=(10, 6))
-    plt.bar(labels, values, color=['blue', 'green', 'red', 'magenta', 'cyan', 'yellow'])
-    plt.xlabel('Metrics')
-    plt.ylabel('Values')
-    plt.title('Last Epoch Metrics')
-    plt.ylim(bottom=0)
-
-    # 添加数值标签
-    for i, value in enumerate(values):
-        plt.text(i, value + 0.01, f'{value:.2f}', ha='center')
-
-    plt.tight_layout()
-    plt.savefig(file_path.replace('.xlsx', '_last_epoch_bar.png'))
-    plt.close()
-
-def draw_and_save_cm(file_path):
-    # 读取 Excel 文件
-    df_cm = pd.read_excel(file_path)
-
-    # 获取标签（假设 DataFrame 的列为类别标签）
-    labels = df_cm.columns[1:].tolist()
-
-    # 获取混淆矩阵和归一化混淆矩阵的数值
-    cm = df_cm.values[:, 1:]
-
-    # 创建一个图像和子图
-    fig, axs = plt.subplots(1, 2, figsize=(12, 6))
-
-    # 绘制普通混淆矩阵
-    axs[0].imshow(cm, interpolation='nearest', cmap='Blues')
-    axs[0].set_title('Confusion Matrix')
-    axs[0].set_xlabel('Predicted')
-    axs[0].set_ylabel('True')
-    axs[0].set_xticks(np.arange(len(labels)))
-    axs[0].set_yticks(np.arange(len(labels)))
-    axs[0].set_xticklabels(labels)
-    axs[0].set_yticklabels(labels)
-
-    # 添加数值标签
-    for i in range(len(labels)):
-        for j in range(len(labels)):
-            axs[0].text(j, i, f'{cm[i, j]}', ha='center', va='center')
-
-    # 调整布局并保存图像
-    plt.tight_layout()
-    plt.savefig(file_path.replace('.xlsx', '.png'))
-    plt.close()
--- a/Qtorch/Functions/init.py
+++ b/Qtorch/Functions/init.py
--- a/Qtorch/Functions/divSet.py
+++ b/Qtorch/Functions/divSet.py
@ -1,28 +0,0 @@
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler, LabelEncoder
-
-def divSet(data, labels = None, test_size=0.2, random_state=None):
-
-  encoder = LabelEncoder()
-
-  # 最后一列是标签
-  X = data.iloc[:, :-1]
-  y = data.iloc[:, -1]
-  
-  if labels:
-    labels = encoder.fit_transform(labels)
-  else:
-    encoder.fit(y)
-  
-  # 分割数据集为训练集和测试集
-  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
-  # 标准化特征
-  scaler = StandardScaler()
-  X_train = scaler.fit_transform(X_train)
-  X_test = scaler.transform(X_test)
-  
-  # 编码标签
-  y_train = encoder.transform(y_train.values.reshape(-1, 1))
-  y_test = encoder.transform(y_test.values.reshape(-1, 1))
-  
-  return X_train, X_test, y_train, y_test, encoder
--- a/Qtorch/Models/Qmlp.py
+++ b/Qtorch/Models/Qmlp.py
@ -15,7 +15,7 @@ class Qmlp(Qnn):
    super(Qmlp, self).__init__(data=data, labels=labels, test_size=test_size, random_state=random_state)

    input_size = self.X_train.shape[1]
-    num_classes = len(np.unique(self.y_train))
+    num_classes = len(labels) if labels is not None else int(np.max(self.y_train)) + 1
    self.layers = nn.ModuleList()
    
    # 连接输入层和第一个隐藏层
--- a/Qtorch/Models/Qnn.py
+++ b/Qtorch/Models/Qnn.py
@ -1,12 +1,12 @@
 import torch
 import torch.nn as nn
+import numpy as np
 import pandas as pd
 from sklearn.decomposition import PCA
 from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
 from torch.utils.data import DataLoader, TensorDataset

-from Qtorch import divSet as DS
-# from Qfunctions.saveToxlsx import save_to_xlsx as stx
+from Qfunctions.divSet import divSet as DS


 class Qnn(nn.Module):
@ -45,11 +45,9 @@ class Qnn(nn.Module):

    # 将data转换为tensor形式
    X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32)
-    self.y_train = self.LABEL_ENCODER.fit_transform(self.y_train)
    y_train_tensor = torch.tensor(self.y_train, dtype=torch.long)

    X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32)
-    self.y_test = self.LABEL_ENCODER.transform(self.y_test)
    y_test_tensor = torch.tensor(self.y_test, dtype=torch.long)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
@ -116,9 +114,9 @@ class Qnn(nn.Module):
              all_prob.extend(prob.cpu().numpy())

      test_accuracy = correct_test / total_test
-      f1 = f1_score(all_labels, all_predicted, average='macro')
-      precision = precision_score(all_labels, all_predicted, average='macro')
-      recall = recall_score(all_labels, all_predicted, average='macro')
+      f1 = f1_score(all_labels, all_predicted, average='macro', zero_division=0)
+      precision = precision_score(all_labels, all_predicted, average='macro', zero_division=0)
+      recall = recall_score(all_labels, all_predicted, average='macro', zero_division=0)

      if (epoch + 1) % 10 == 0:
        print('===============================================')
@ -148,8 +146,10 @@ class Qnn(nn.Module):
          break

    # cmn为归一化矩阵
-    self.cm  = confusion_matrix(all_labels, all_predicted)
-    self.cmn = confusion_matrix(all_labels, all_predicted, normalize='true')
+    # Keep matrix dimensions stable even when some classes do not appear in this split.
+    cm_labels = np.arange(len(self.labels)) if self.labels is not None else None
+    self.cm  = confusion_matrix(all_labels, all_predicted, labels=cm_labels)
+    self.cmn = confusion_matrix(all_labels, all_predicted, labels=cm_labels, normalize='true')

    print(self.cm)
    return
--- a/Qtorch/init.py
+++ b/Qtorch/init.py
@ -1,3 +1,2 @@
 # Qtorch/__init__.py
-from .Functions.divSet import divSet
 from .Models import Qnn, Qmlp, Qcnn
--- a/README.md
+++ b/README.md
@ -143,8 +143,8 @@ Wood    <-> Wood.xlsx     或  Wood/
 ```python
 from Qtorch.Models.Qmlp import Qmlp
 from Qfunctions.divSet import divSet
-from Qfunctions.loaData import load_data
-from Qfunctions.saveToxlsx import save_to_xlsx
+from Qfunctions.loadData import load_data
+from Qfunctions.saveToXlsx import save_to_xlsx

 projet_name = '20241009MaterialDiv'
 label_names = ['Acrlic', 'Ecoflex', 'PDMS', 'PLA', 'Wood']
--- a/main.py
+++ b/main.py
@ -1,7 +1,6 @@
 from Qtorch.Models.Qmlp import Qmlp
-from Qfunctions.divSet import divSet
-from Qfunctions.loaData import load_data
-from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx
+from Qfunctions.loadData import load_data
+from Qfunctions.saveToXlsx import save_to_xlsx as save_to_xlsx

 def main():
  # 输入元数据文件夹名称
@ -11,13 +10,12 @@ def main():
  label_names = list(range(10))
  print(label_names)
  data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx')
-  X_train, X_test, y_train, y_test, encoder = divSet(
-    data=data, labels=label_names, test_size= 0.3
-  )
  
  model = Qmlp(
-    X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
+    data=data,
+    labels=label_names,
    hidden_layers = [128, 256, 128],
+    test_size=0.3,
    dropout_rate=0      
    )
  # model = QCNN(