refactor: unify module structure and suppress training warnings

- Move canonical implementations to Qfunctions layer (divSet.py, loadData.py, saveToXlsx.py)
- Remove duplicate compatibility shims (loaData.py, saveToxlsx.py)
- Remove redundant Qtorch/Functions/ directory
- Add zero_division=0 to sklearn metrics to suppress UndefinedMetricWarning
- Set matplotlib backend to Agg to eliminate Wayland/Qt warnings
- Update all imports to use canonical module paths
This commit is contained in:
newbieQQ 2026-03-29 12:48:41 +08:00
parent 353af6ab45
commit f6f839ebc0
13 changed files with 299 additions and 304 deletions

15
.vscode/launch.json vendored
View File

@ -1,15 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current this project",
"type": "debugpy",
"request": "launch",
"program": "main.py",
"console": "integratedTerminal"
}
]
}

View File

@ -0,0 +1,5 @@
from .divSet import divSet
from .loadData import load_data
from .saveToXlsx import save_to_xlsx
__all__ = ["divSet", "load_data", "save_to_xlsx"]

45
Qfunctions/divSet.py Normal file
View File

@ -0,0 +1,45 @@
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
def divSet(data, labels=None, test_size=0.2, random_state=None):
"""Split data, scale features, and encode labels.
This module is the canonical location for dataset splitting utilities.
"""
encoder = LabelEncoder()
# 最后一列是标签
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
if labels:
encoder.fit(labels)
else:
encoder.fit(y)
# 优先使用分层抽样,尽量保证每个类别在训练集和测试集都出现。
stratify_target = y if y.nunique() > 1 else None
try:
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=test_size, random_state=random_state, stratify=stratify_target
)
except ValueError:
# 当样本过少等情况下分层失败,回退到普通随机划分。
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=test_size, random_state=random_state
)
# 标准化特征
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 编码标签
y_train = encoder.transform(y_train.values)
y_test = encoder.transform(y_test.values)
return X_train, X_test, y_train, y_test, encoder
__all__ = ["divSet"]

View File

@ -4,6 +4,7 @@ import pandas as pd
STATIC_PATH = './Static' STATIC_PATH = './Static'
# 从文件夹中读取所有xlsx文件每个文件对应一个label # 从文件夹中读取所有xlsx文件每个文件对应一个label
# labelNames为label的名字如果不提供则默认为文件名 # labelNames为label的名字如果不提供则默认为文件名
def load_data(folder, labelNames, isDir=True, fileClass='xlsx'): def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
@ -21,14 +22,15 @@ def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
if not os.path.isdir(folder): if not os.path.isdir(folder):
raise ValueError(f"The folder '{folder}' does not exist.") raise ValueError(f"The folder '{folder}' does not exist.")
data = None
if not isDir: if not isDir:
data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass) data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass)
else: else:
data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass) data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass)
print(data) print(data)
return data return data
def load_from_folder(folder, labelNames, fileClass): def load_from_folder(folder, labelNames, fileClass):
all_features = [] all_features = []
fileClass = '.' + fileClass fileClass = '.' + fileClass
@ -43,11 +45,14 @@ def load_from_folder(folder, labelNames, fileClass):
features.append(load_xlsx(file_path, labelName, max_row_length, 'zero')) features.append(load_xlsx(file_path, labelName, max_row_length, 'zero'))
if features: if features:
all_features.append(pd.concat(features, ignore_index=True)) all_features.append(pd.concat(features, ignore_index=True))
# 将所有标签的数据合并 # 将所有标签的数据合并
return pd.concat(all_features, ignore_index=True) return pd.concat(all_features, ignore_index=True)
def load_from_file(folder, labelNames, fileClass): def load_from_file(folder, labelNames, fileClass):
# 构建期望的文件名label + .扩展名并在目录中进行健壮匹配去除零宽字符、Unicode 规范化、大小写不敏感) # 构建期望的文件名label + .扩展名),并在目录中进行健壮匹配
# 去除零宽字符、Unicode 规范化、大小写不敏感)
expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames] expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames]
actual_file_names = [] actual_file_names = []
@ -75,27 +80,17 @@ def load_from_file(folder, labelNames, fileClass):
file_path = os.path.join(folder, fileName) file_path = os.path.join(folder, fileName)
features = load_xlsx(file_path, labelNames[i], max_row_length, 'zero') features = load_xlsx(file_path, labelNames[i], max_row_length, 'zero')
all_features.append(features) all_features.append(features)
return pd.concat(all_features, ignore_index = True) return pd.concat(all_features, ignore_index=True)
def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None): def load_xlsx(fileName, labelName, max_row_length=1000, fill_rule=None):
df = pd.read_excel(fileName) df = pd.read_excel(fileName)
# 提取偶数列 # 提取偶数列
features = df.iloc[0:, 1::2] features = df.iloc[0:, 1::2]
# ## 复制 features DataFrame
# features_copy = features.copy()
# ## 使用 pd.concat 来追加副本到原始 DataFrame
# features = pd.concat([features, features_copy], ignore_index=True, axis=1)
# 计算变化率
# first_value = features.iloc[0, :] # 获取第一行的数据
# features_pct_change = (features - first_value) / first_value
# features = features_pct_change
features.dropna(inplace=True) features.dropna(inplace=True)
features.reset_index(drop=True, inplace=True) features.reset_index(drop=True, inplace=True)
features = features.T features = features.T
# 补全每一行到指定长度 # 补全每一行到指定长度
@ -103,26 +98,28 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None):
# 获取实际的列数 # 获取实际的列数
actual_columns = features.shape[1] actual_columns = features.shape[1]
features['label'] = labelName features['label'] = labelName
# 使用实际的列数来创建列名
features.columns = [f'feature{i+1}' for i in range(actual_columns)] + ['label'] features.columns = [f'feature{i+1}' for i in range(actual_columns)] + ['label']
return features return features
def fill_to_len(row, length = 1000, rule = None):
fill_value = 0
def fill_to_len(row, length=1000, rule=None):
if len(row) >= length:
return row.iloc[:length].reset_index(drop=True)
fill_value = 0
if rule == 'min': if rule == 'min':
fill_value = row.min() fill_value = row.min()
elif rule == 'mean': elif rule == 'mean':
fill_value = row.mean() fill_value = row.mean()
elif rule == 'zero': elif rule == 'zero':
fill_value = 0 fill_value = 0
fill_values = pd.Series([fill_value] * (length - len(row)))
fill_values = pd.Series([fill_value] * (length - len(row)))
return pd.concat([row, fill_values], ignore_index=True) return pd.concat([row, fill_values], ignore_index=True)
def get_max_row_len(folder, filenames): def get_max_row_len(folder, filenames):
max_len = 0 max_len = 0
for filename in filenames: for filename in filenames:
@ -130,7 +127,6 @@ def get_max_row_len(folder, filenames):
max_len = max(max_len, df.shape[0]) max_len = max(max_len, df.shape[0])
return max_len return max_len
__all__ = ['load_data']
# ---------- 内部工具函数:处理包含零宽字符或不同 Unicode 形式的文件名匹配 ---------- # ---------- 内部工具函数:处理包含零宽字符或不同 Unicode 形式的文件名匹配 ----------
@ -139,28 +135,28 @@ def _strip_zero_width(s: str) -> str:
if not isinstance(s, str): if not isinstance(s, str):
return s return s
return s.translate({ return s.translate({
0x200B: None, # ZERO WIDTH SPACE 0x200B: None,
0x200C: None, # ZERO WIDTH NON-JOINER 0x200C: None,
0x200D: None, # ZERO WIDTH JOINER 0x200D: None,
0xFEFF: None, # ZERO WIDTH NO-BREAK SPACE 0xFEFF: None,
}) })
def _canonicalize_name(name: str) -> str: def _canonicalize_name(name: str) -> str:
# 规范化到 NFKC并移除零宽字符 # 规范化到 NFKC并移除零宽字符
name = unicodedata.normalize('NFKC', name) name = unicodedata.normalize('NFKC', name)
name = _strip_zero_width(name) name = _strip_zero_width(name)
return name return name
def _normalize_for_compare(name: str) -> str: def _normalize_for_compare(name: str) -> str:
# 进一步规范化用于宽松比较: # 进一步规范化用于宽松比较
# - 统一大小写
# - 将下划线视为空格(与文件名用下划线代替空格的情况匹配)
# - 折叠所有空白为一个空格,并去除首尾空格
n = _canonicalize_name(name) n = _canonicalize_name(name)
n = n.replace('_', ' ') n = n.replace('_', ' ')
n = ' '.join(n.split()) n = ' '.join(n.split())
return n.lower() return n.lower()
def _find_matching_file(folder: str, expected_name: str): def _find_matching_file(folder: str, expected_name: str):
# 首先进行严格匹配(规范化后相等) # 首先进行严格匹配(规范化后相等)
expected = _canonicalize_name(expected_name) expected = _canonicalize_name(expected_name)
@ -179,10 +175,13 @@ def _find_matching_file(folder: str, expected_name: str):
if _canonicalize_name(f).lower() == expected_lower: if _canonicalize_name(f).lower() == expected_lower:
return f return f
# 宽松策略:将下划线当作空格处理,并折叠空白(用于匹配 "Crocodile grain" vs "Crocodile_grain" # 宽松策略:将下划线当作空格处理,并折叠空白
expected_relaxed = _normalize_for_compare(expected_name) expected_relaxed = _normalize_for_compare(expected_name)
for f in entries: for f in entries:
if _normalize_for_compare(f) == expected_relaxed: if _normalize_for_compare(f) == expected_relaxed:
return f return f
return None return None
__all__ = ['load_data']

165
Qfunctions/saveToXlsx.py Normal file
View File

@ -0,0 +1,165 @@
import os
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
def save_to_xlsx(project_name, file_name, data):
folder_path = f'Result/{project_name}'
os.makedirs(folder_path, exist_ok=True)
data.to_excel(f'{folder_path}/{file_name}.xlsx', index=True)
print('Save successed to ' + f'{folder_path}/{file_name}.xlsx')
save_to_pic(project_name=project_name, file_name=file_name)
return
def save_to_pic(project_name, file_name):
os.makedirs(f'Result/{project_name}', exist_ok=True)
if file_name == 'pca_2d':
draw_pca_2d(f'Result/{project_name}/{file_name}.xlsx')
print('Save successed to ' + f'Result/{project_name}/{file_name}.png')
elif file_name == 'pca_3d':
draw_pca_3d(f'Result/{project_name}/{file_name}.xlsx')
print('Save successed to ' + f'Result/{project_name}/{file_name}.png')
elif file_name == 'acc_and_loss':
draw_epoch_data(f'Result/{project_name}/{file_name}.xlsx')
draw_last_epoch_bar_chart(f'Result/{project_name}/{file_name}.xlsx')
print('Save successed to line graph and bar graph')
elif file_name == 'cm':
draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
print('Save successed cm')
elif file_name == 'cmn':
draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
print('Save successed cmn')
else:
print('unknow picture type')
def draw_pca_2d(file_path):
df = pd.read_excel(file_path)
plt.figure(figsize=(8, 6))
plt.scatter(df['PC1'], df['PC2'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('2D PCA')
plt.colorbar(label='Labels')
plt.savefig(file_path.replace('.xlsx', '.png'))
plt.close()
def draw_pca_3d(file_path):
df = pd.read_excel(file_path)
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(df['PC1'], df['PC2'], df['PC3'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_zlabel('PC3')
ax.set_title('3D PCA')
fig.colorbar(scatter, ax=ax, label='Labels')
plt.savefig(file_path.replace('.xlsx', '.png'))
def draw_epoch_data(file_path):
df = pd.read_excel(file_path)
epochs = df['epoch']
train_loss = df['train_loss']
train_accuracy = df['train_accuracy'] * 100
test_accuracy = df['test_accuracy'] * 100
f1_score = df['f1_score']
precision = df['precision']
recall = df['recall']
fig, axs = plt.subplots(2, 3, figsize=(18, 12))
axs[0, 0].plot(epochs, train_loss, 'b-', label='Train Loss')
axs[0, 0].set_xlabel('Epoch')
axs[0, 0].set_ylabel('Loss')
axs[0, 0].set_title('Training Loss over Epochs')
axs[0, 0].legend()
axs[0, 1].plot(epochs, train_accuracy, 'g-', label='Train Accuracy')
axs[0, 1].plot(epochs, test_accuracy, 'r-', label='Test Accuracy')
axs[0, 1].set_xlabel('Epoch')
axs[0, 1].set_ylabel('Accuracy (%)')
axs[0, 1].set_title('Train and Test Accuracy over Epochs')
axs[0, 1].legend()
axs[0, 2].plot(epochs, f1_score, 'm-', label='F1 Score')
axs[0, 2].set_xlabel('Epoch')
axs[0, 2].set_ylabel('F1 Score')
axs[0, 2].set_title('F1 Score over Epochs')
axs[0, 2].legend()
axs[1, 0].plot(epochs, precision, 'c-', label='Precision')
axs[1, 0].set_xlabel('Epoch')
axs[1, 0].set_ylabel('Precision')
axs[1, 0].set_title('Precision over Epochs')
axs[1, 0].legend()
axs[1, 1].plot(epochs, recall, 'y-', label='Recall')
axs[1, 1].set_xlabel('Epoch')
axs[1, 1].set_ylabel('Recall')
axs[1, 1].set_title('Recall over Epochs')
axs[1, 1].legend()
axs[1, 2].axis('off')
plt.tight_layout()
plt.savefig(file_path.replace('.xlsx', '_epoch.png'))
plt.close()
def draw_last_epoch_bar_chart(file_path):
df = pd.read_excel(file_path)
last_epoch_data = df.iloc[-1]
metrics = ['train_loss', 'train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall']
values = [last_epoch_data[metric] for metric in metrics]
labels = ['Train Loss', 'Train Accuracy', 'Test Accuracy', 'F1 Score', 'Precision', 'Recall']
values[1] *= 100
values[2] *= 100
plt.figure(figsize=(10, 6))
plt.bar(labels, values, color=['blue', 'green', 'red', 'magenta', 'cyan', 'yellow'])
plt.xlabel('Metrics')
plt.ylabel('Values')
plt.title('Last Epoch Metrics')
plt.ylim(bottom=0)
for i, value in enumerate(values):
plt.text(i, value + 0.01, f'{value:.2f}', ha='center')
plt.tight_layout()
plt.savefig(file_path.replace('.xlsx', '_last_epoch_bar.png'))
plt.close()
def draw_and_save_cm(file_path):
df_cm = pd.read_excel(file_path)
labels = df_cm.columns[1:].tolist()
cm = df_cm.values[:, 1:]
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
axs[0].imshow(cm, interpolation='nearest', cmap='Blues')
axs[0].set_title('Confusion Matrix')
axs[0].set_xlabel('Predicted')
axs[0].set_ylabel('True')
axs[0].set_xticks(np.arange(len(labels)))
axs[0].set_yticks(np.arange(len(labels)))
axs[0].set_xticklabels(labels)
axs[0].set_yticklabels(labels)
for i in range(len(labels)):
for j in range(len(labels)):
axs[0].text(j, i, f'{cm[i, j]}', ha='center', va='center')
plt.tight_layout()
plt.savefig(file_path.replace('.xlsx', '.png'))
plt.close()

View File

@ -1,173 +0,0 @@
import os
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
def save_to_xlsx(project_name, file_name, data):
folder_path = f'Result/{project_name}'
os.makedirs(folder_path, exist_ok=True)
data.to_excel(f'{folder_path}/{file_name}.xlsx', index=True)
print("Save successed to " + f'{folder_path}/{file_name}.xlsx')
save_to_pic(project_name=project_name, file_name=file_name)
return
def save_to_pic(project_name, file_name):
os.makedirs(f'Result/{project_name}', exist_ok=True)
if file_name == 'pca_2d':
draw_pca_2d(f'Result/{project_name}/{file_name}.xlsx')
print("Save successed to " + f'Result/{project_name}/{file_name}.png')
elif file_name == 'pca_3d':
draw_pca_3d(f'Result/{project_name}/{file_name}.xlsx')
print("Save successed to " + f'Result/{project_name}/{file_name}.png')
elif file_name == 'acc_and_loss':
draw_epoch_data(f'Result/{project_name}/{file_name}.xlsx')
draw_last_epoch_bar_chart(f'Result/{project_name}/{file_name}.xlsx')
print("Save successed to line graph and bar graph")
elif file_name == 'cm':
draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
print("Save successed cm")
elif file_name == 'cmn':
draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
print("Save successed cmn")
else:
print("unknow picture type")
def draw_pca_2d(file_path):
df = pd.read_excel(file_path)
plt.figure(figsize=(8, 6))
plt.scatter(df['PC1'], df['PC2'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('2D PCA')
plt.colorbar(label='Labels')
plt.savefig(file_path.replace('.xlsx', '.png'))
plt.close()
def draw_pca_3d(file_path):
df = pd.read_excel(file_path)
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(df['PC1'], df['PC2'], df['PC3'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_zlabel('PC3')
ax.set_title('3D PCA')
fig.colorbar(scatter, ax=ax, label='Labels')
plt.savefig(file_path.replace('.xlsx', '.png'))
def draw_epoch_data(file_path):
df = pd.read_excel(file_path)
epochs = df['epoch']
train_loss = df['train_loss']
train_accuracy = df['train_accuracy'] * 100
test_accuracy = df['test_accuracy'] * 100
f1_score = df['f1_score']
precision = df['precision']
recall = df['recall']
fig, axs = plt.subplots(2, 3, figsize=(18, 12))
# 折线图:训练损失
axs[0, 0].plot(epochs, train_loss, 'b-', label='Train Loss')
axs[0, 0].set_xlabel('Epoch')
axs[0, 0].set_ylabel('Loss')
axs[0, 0].set_title('Training Loss over Epochs')
axs[0, 0].legend()
# 折线图:训练准确率和测试准确率
axs[0, 1].plot(epochs, train_accuracy, 'g-', label='Train Accuracy')
axs[0, 1].plot(epochs, test_accuracy, 'r-', label='Test Accuracy')
axs[0, 1].set_xlabel('Epoch')
axs[0, 1].set_ylabel('Accuracy (%)')
axs[0, 1].set_title('Train and Test Accuracy over Epochs')
axs[0, 1].legend()
# 折线图F1 Score
axs[0, 2].plot(epochs, f1_score, 'm-', label='F1 Score')
axs[0, 2].set_xlabel('Epoch')
axs[0, 2].set_ylabel('F1 Score')
axs[0, 2].set_title('F1 Score over Epochs')
axs[0, 2].legend()
# 折线图Precision
axs[1, 0].plot(epochs, precision, 'c-', label='Precision')
axs[1, 0].set_xlabel('Epoch')
axs[1, 0].set_ylabel('Precision')
axs[1, 0].set_title('Precision over Epochs')
axs[1, 0].legend()
# 折线图Recall
axs[1, 1].plot(epochs, recall, 'y-', label='Recall')
axs[1, 1].set_xlabel('Epoch')
axs[1, 1].set_ylabel('Recall')
axs[1, 1].set_title('Recall over Epochs')
axs[1, 1].legend()
# 空白或额外的图表空间(如果需要)
axs[1, 2].axis('off')
plt.tight_layout()
plt.savefig(file_path.replace('.xlsx', '_epoch.png'))
plt.close()
def draw_last_epoch_bar_chart(file_path):
df = pd.read_excel(file_path)
last_epoch_data = df.iloc[-1]
metrics = ['train_loss', 'train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall']
values = [last_epoch_data[metric] for metric in metrics]
labels = ['Train Loss', 'Train Accuracy', 'Test Accuracy', 'F1 Score', 'Precision', 'Recall']
# 调整数值格式
values[1] *= 100 # Train Accuracy
values[2] *= 100 # Test Accuracy
plt.figure(figsize=(10, 6))
plt.bar(labels, values, color=['blue', 'green', 'red', 'magenta', 'cyan', 'yellow'])
plt.xlabel('Metrics')
plt.ylabel('Values')
plt.title('Last Epoch Metrics')
plt.ylim(bottom=0)
# 添加数值标签
for i, value in enumerate(values):
plt.text(i, value + 0.01, f'{value:.2f}', ha='center')
plt.tight_layout()
plt.savefig(file_path.replace('.xlsx', '_last_epoch_bar.png'))
plt.close()
def draw_and_save_cm(file_path):
# 读取 Excel 文件
df_cm = pd.read_excel(file_path)
# 获取标签(假设 DataFrame 的列为类别标签)
labels = df_cm.columns[1:].tolist()
# 获取混淆矩阵和归一化混淆矩阵的数值
cm = df_cm.values[:, 1:]
# 创建一个图像和子图
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
# 绘制普通混淆矩阵
axs[0].imshow(cm, interpolation='nearest', cmap='Blues')
axs[0].set_title('Confusion Matrix')
axs[0].set_xlabel('Predicted')
axs[0].set_ylabel('True')
axs[0].set_xticks(np.arange(len(labels)))
axs[0].set_yticks(np.arange(len(labels)))
axs[0].set_xticklabels(labels)
axs[0].set_yticklabels(labels)
# 添加数值标签
for i in range(len(labels)):
for j in range(len(labels)):
axs[0].text(j, i, f'{cm[i, j]}', ha='center', va='center')
# 调整布局并保存图像
plt.tight_layout()
plt.savefig(file_path.replace('.xlsx', '.png'))
plt.close()

View File

@ -1,28 +0,0 @@
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
def divSet(data, labels = None, test_size=0.2, random_state=None):
encoder = LabelEncoder()
# 最后一列是标签
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
if labels:
labels = encoder.fit_transform(labels)
else:
encoder.fit(y)
# 分割数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
# 标准化特征
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 编码标签
y_train = encoder.transform(y_train.values.reshape(-1, 1))
y_test = encoder.transform(y_test.values.reshape(-1, 1))
return X_train, X_test, y_train, y_test, encoder

View File

@ -15,7 +15,7 @@ class Qmlp(Qnn):
super(Qmlp, self).__init__(data=data, labels=labels, test_size=test_size, random_state=random_state) super(Qmlp, self).__init__(data=data, labels=labels, test_size=test_size, random_state=random_state)
input_size = self.X_train.shape[1] input_size = self.X_train.shape[1]
num_classes = len(np.unique(self.y_train)) num_classes = len(labels) if labels is not None else int(np.max(self.y_train)) + 1
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
# 连接输入层和第一个隐藏层 # 连接输入层和第一个隐藏层

View File

@ -1,12 +1,12 @@
import torch import torch
import torch.nn as nn import torch.nn as nn
import numpy as np
import pandas as pd import pandas as pd
from sklearn.decomposition import PCA from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
from torch.utils.data import DataLoader, TensorDataset from torch.utils.data import DataLoader, TensorDataset
from Qtorch import divSet as DS from Qfunctions.divSet import divSet as DS
# from Qfunctions.saveToxlsx import save_to_xlsx as stx
class Qnn(nn.Module): class Qnn(nn.Module):
@ -45,11 +45,9 @@ class Qnn(nn.Module):
# 将data转换为tensor形式 # 将data转换为tensor形式
X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32) X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32)
self.y_train = self.LABEL_ENCODER.fit_transform(self.y_train)
y_train_tensor = torch.tensor(self.y_train, dtype=torch.long) y_train_tensor = torch.tensor(self.y_train, dtype=torch.long)
X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32) X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32)
self.y_test = self.LABEL_ENCODER.transform(self.y_test)
y_test_tensor = torch.tensor(self.y_test, dtype=torch.long) y_test_tensor = torch.tensor(self.y_test, dtype=torch.long)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor) train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
@ -116,9 +114,9 @@ class Qnn(nn.Module):
all_prob.extend(prob.cpu().numpy()) all_prob.extend(prob.cpu().numpy())
test_accuracy = correct_test / total_test test_accuracy = correct_test / total_test
f1 = f1_score(all_labels, all_predicted, average='macro') f1 = f1_score(all_labels, all_predicted, average='macro', zero_division=0)
precision = precision_score(all_labels, all_predicted, average='macro') precision = precision_score(all_labels, all_predicted, average='macro', zero_division=0)
recall = recall_score(all_labels, all_predicted, average='macro') recall = recall_score(all_labels, all_predicted, average='macro', zero_division=0)
if (epoch + 1) % 10 == 0: if (epoch + 1) % 10 == 0:
print('===============================================') print('===============================================')
@ -148,8 +146,10 @@ class Qnn(nn.Module):
break break
# cmn为归一化矩阵 # cmn为归一化矩阵
self.cm = confusion_matrix(all_labels, all_predicted) # Keep matrix dimensions stable even when some classes do not appear in this split.
self.cmn = confusion_matrix(all_labels, all_predicted, normalize='true') cm_labels = np.arange(len(self.labels)) if self.labels is not None else None
self.cm = confusion_matrix(all_labels, all_predicted, labels=cm_labels)
self.cmn = confusion_matrix(all_labels, all_predicted, labels=cm_labels, normalize='true')
print(self.cm) print(self.cm)
return return

View File

@ -1,3 +1,2 @@
# Qtorch/__init__.py # Qtorch/__init__.py
from .Functions.divSet import divSet
from .Models import Qnn, Qmlp, Qcnn from .Models import Qnn, Qmlp, Qcnn

View File

@ -143,8 +143,8 @@ Wood <-> Wood.xlsx 或 Wood/
```python ```python
from Qtorch.Models.Qmlp import Qmlp from Qtorch.Models.Qmlp import Qmlp
from Qfunctions.divSet import divSet from Qfunctions.divSet import divSet
from Qfunctions.loaData import load_data from Qfunctions.loadData import load_data
from Qfunctions.saveToxlsx import save_to_xlsx from Qfunctions.saveToXlsx import save_to_xlsx
projet_name = '20241009MaterialDiv' projet_name = '20241009MaterialDiv'
label_names = ['Acrlic', 'Ecoflex', 'PDMS', 'PLA', 'Wood'] label_names = ['Acrlic', 'Ecoflex', 'PDMS', 'PLA', 'Wood']

12
main.py
View File

@ -1,7 +1,6 @@
from Qtorch.Models.Qmlp import Qmlp from Qtorch.Models.Qmlp import Qmlp
from Qfunctions.divSet import divSet from Qfunctions.loadData import load_data
from Qfunctions.loaData import load_data from Qfunctions.saveToXlsx import save_to_xlsx as save_to_xlsx
from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx
def main(): def main():
# 输入元数据文件夹名称 # 输入元数据文件夹名称
@ -11,13 +10,12 @@ def main():
label_names = list(range(10)) label_names = list(range(10))
print(label_names) print(label_names)
data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx') data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx')
X_train, X_test, y_train, y_test, encoder = divSet(
data=data, labels=label_names, test_size= 0.3
)
model = Qmlp( model = Qmlp(
X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, data=data,
labels=label_names,
hidden_layers = [128, 256, 128], hidden_layers = [128, 256, 128],
test_size=0.3,
dropout_rate=0 dropout_rate=0
) )
# model = QCNN( # model = QCNN(