refactor: unify module structure and suppress training warnings
- Move canonical implementations to Qfunctions layer (divSet.py, loadData.py, saveToXlsx.py) - Remove duplicate compatibility shims (loaData.py, saveToxlsx.py) - Remove redundant Qtorch/Functions/ directory - Add zero_division=0 to sklearn metrics to suppress UndefinedMetricWarning - Set matplotlib backend to Agg to eliminate Wayland/Qt warnings - Update all imports to use canonical module paths
This commit is contained in:
parent
353af6ab45
commit
f6f839ebc0
|
|
@ -1,15 +0,0 @@
|
||||||
{
|
|
||||||
// Use IntelliSense to learn about possible attributes.
|
|
||||||
// Hover to view descriptions of existing attributes.
|
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "Python Debugger: Current this project",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "main.py",
|
|
||||||
"console": "integratedTerminal"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
from .divSet import divSet
|
||||||
|
from .loadData import load_data
|
||||||
|
from .saveToXlsx import save_to_xlsx
|
||||||
|
|
||||||
|
__all__ = ["divSet", "load_data", "save_to_xlsx"]
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
||||||
|
|
||||||
|
|
||||||
|
def divSet(data, labels=None, test_size=0.2, random_state=None):
|
||||||
|
"""Split data, scale features, and encode labels.
|
||||||
|
|
||||||
|
This module is the canonical location for dataset splitting utilities.
|
||||||
|
"""
|
||||||
|
encoder = LabelEncoder()
|
||||||
|
|
||||||
|
# 最后一列是标签
|
||||||
|
X = data.iloc[:, :-1]
|
||||||
|
y = data.iloc[:, -1]
|
||||||
|
|
||||||
|
if labels:
|
||||||
|
encoder.fit(labels)
|
||||||
|
else:
|
||||||
|
encoder.fit(y)
|
||||||
|
|
||||||
|
# 优先使用分层抽样,尽量保证每个类别在训练集和测试集都出现。
|
||||||
|
stratify_target = y if y.nunique() > 1 else None
|
||||||
|
try:
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||||||
|
X, y, test_size=test_size, random_state=random_state, stratify=stratify_target
|
||||||
|
)
|
||||||
|
except ValueError:
|
||||||
|
# 当样本过少等情况下分层失败,回退到普通随机划分。
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||||||
|
X, y, test_size=test_size, random_state=random_state
|
||||||
|
)
|
||||||
|
|
||||||
|
# 标准化特征
|
||||||
|
scaler = StandardScaler()
|
||||||
|
X_train = scaler.fit_transform(X_train)
|
||||||
|
X_test = scaler.transform(X_test)
|
||||||
|
|
||||||
|
# 编码标签
|
||||||
|
y_train = encoder.transform(y_train.values)
|
||||||
|
y_test = encoder.transform(y_test.values)
|
||||||
|
|
||||||
|
return X_train, X_test, y_train, y_test, encoder
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["divSet"]
|
||||||
|
|
@ -4,6 +4,7 @@ import pandas as pd
|
||||||
|
|
||||||
STATIC_PATH = './Static'
|
STATIC_PATH = './Static'
|
||||||
|
|
||||||
|
|
||||||
# 从文件夹中读取所有xlsx文件,每个文件对应一个label
|
# 从文件夹中读取所有xlsx文件,每个文件对应一个label
|
||||||
# labelNames为label的名字,如果不提供则默认为文件名
|
# labelNames为label的名字,如果不提供则默认为文件名
|
||||||
def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
|
def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
|
||||||
|
|
@ -21,14 +22,15 @@ def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
|
||||||
if not os.path.isdir(folder):
|
if not os.path.isdir(folder):
|
||||||
raise ValueError(f"The folder '{folder}' does not exist.")
|
raise ValueError(f"The folder '{folder}' does not exist.")
|
||||||
|
|
||||||
data = None
|
|
||||||
if not isDir:
|
if not isDir:
|
||||||
data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass)
|
data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass)
|
||||||
else:
|
else:
|
||||||
data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass)
|
data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass)
|
||||||
|
|
||||||
print(data)
|
print(data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
def load_from_folder(folder, labelNames, fileClass):
|
def load_from_folder(folder, labelNames, fileClass):
|
||||||
all_features = []
|
all_features = []
|
||||||
fileClass = '.' + fileClass
|
fileClass = '.' + fileClass
|
||||||
|
|
@ -43,11 +45,14 @@ def load_from_folder(folder, labelNames, fileClass):
|
||||||
features.append(load_xlsx(file_path, labelName, max_row_length, 'zero'))
|
features.append(load_xlsx(file_path, labelName, max_row_length, 'zero'))
|
||||||
if features:
|
if features:
|
||||||
all_features.append(pd.concat(features, ignore_index=True))
|
all_features.append(pd.concat(features, ignore_index=True))
|
||||||
|
|
||||||
# 将所有标签的数据合并
|
# 将所有标签的数据合并
|
||||||
return pd.concat(all_features, ignore_index=True)
|
return pd.concat(all_features, ignore_index=True)
|
||||||
|
|
||||||
|
|
||||||
def load_from_file(folder, labelNames, fileClass):
|
def load_from_file(folder, labelNames, fileClass):
|
||||||
# 构建期望的文件名(label + .扩展名),并在目录中进行健壮匹配(去除零宽字符、Unicode 规范化、大小写不敏感)
|
# 构建期望的文件名(label + .扩展名),并在目录中进行健壮匹配
|
||||||
|
# (去除零宽字符、Unicode 规范化、大小写不敏感)
|
||||||
expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames]
|
expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames]
|
||||||
|
|
||||||
actual_file_names = []
|
actual_file_names = []
|
||||||
|
|
@ -83,19 +88,9 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None):
|
||||||
|
|
||||||
# 提取偶数列
|
# 提取偶数列
|
||||||
features = df.iloc[0:, 1::2]
|
features = df.iloc[0:, 1::2]
|
||||||
# ## 复制 features DataFrame
|
|
||||||
# features_copy = features.copy()
|
|
||||||
# ## 使用 pd.concat 来追加副本到原始 DataFrame
|
|
||||||
# features = pd.concat([features, features_copy], ignore_index=True, axis=1)
|
|
||||||
|
|
||||||
# 计算变化率
|
|
||||||
# first_value = features.iloc[0, :] # 获取第一行的数据
|
|
||||||
# features_pct_change = (features - first_value) / first_value
|
|
||||||
# features = features_pct_change
|
|
||||||
|
|
||||||
features.dropna(inplace=True)
|
features.dropna(inplace=True)
|
||||||
features.reset_index(drop=True, inplace=True)
|
features.reset_index(drop=True, inplace=True)
|
||||||
|
|
||||||
features = features.T
|
features = features.T
|
||||||
|
|
||||||
# 补全每一行到指定长度
|
# 补全每一行到指定长度
|
||||||
|
|
@ -103,26 +98,28 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None):
|
||||||
|
|
||||||
# 获取实际的列数
|
# 获取实际的列数
|
||||||
actual_columns = features.shape[1]
|
actual_columns = features.shape[1]
|
||||||
|
|
||||||
features['label'] = labelName
|
features['label'] = labelName
|
||||||
# 使用实际的列数来创建列名
|
|
||||||
features.columns = [f'feature{i+1}' for i in range(actual_columns)] + ['label']
|
features.columns = [f'feature{i+1}' for i in range(actual_columns)] + ['label']
|
||||||
|
|
||||||
return features
|
return features
|
||||||
|
|
||||||
def fill_to_len(row, length = 1000, rule = None):
|
|
||||||
fill_value = 0
|
|
||||||
|
|
||||||
|
def fill_to_len(row, length=1000, rule=None):
|
||||||
|
if len(row) >= length:
|
||||||
|
return row.iloc[:length].reset_index(drop=True)
|
||||||
|
|
||||||
|
fill_value = 0
|
||||||
if rule == 'min':
|
if rule == 'min':
|
||||||
fill_value = row.min()
|
fill_value = row.min()
|
||||||
elif rule == 'mean':
|
elif rule == 'mean':
|
||||||
fill_value = row.mean()
|
fill_value = row.mean()
|
||||||
elif rule == 'zero':
|
elif rule == 'zero':
|
||||||
fill_value = 0
|
fill_value = 0
|
||||||
fill_values = pd.Series([fill_value] * (length - len(row)))
|
|
||||||
|
|
||||||
|
fill_values = pd.Series([fill_value] * (length - len(row)))
|
||||||
return pd.concat([row, fill_values], ignore_index=True)
|
return pd.concat([row, fill_values], ignore_index=True)
|
||||||
|
|
||||||
|
|
||||||
def get_max_row_len(folder, filenames):
|
def get_max_row_len(folder, filenames):
|
||||||
max_len = 0
|
max_len = 0
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
|
|
@ -130,7 +127,6 @@ def get_max_row_len(folder, filenames):
|
||||||
max_len = max(max_len, df.shape[0])
|
max_len = max(max_len, df.shape[0])
|
||||||
return max_len
|
return max_len
|
||||||
|
|
||||||
__all__ = ['load_data']
|
|
||||||
|
|
||||||
# ---------- 内部工具函数:处理包含零宽字符或不同 Unicode 形式的文件名匹配 ----------
|
# ---------- 内部工具函数:处理包含零宽字符或不同 Unicode 形式的文件名匹配 ----------
|
||||||
|
|
||||||
|
|
@ -139,28 +135,28 @@ def _strip_zero_width(s: str) -> str:
|
||||||
if not isinstance(s, str):
|
if not isinstance(s, str):
|
||||||
return s
|
return s
|
||||||
return s.translate({
|
return s.translate({
|
||||||
0x200B: None, # ZERO WIDTH SPACE
|
0x200B: None,
|
||||||
0x200C: None, # ZERO WIDTH NON-JOINER
|
0x200C: None,
|
||||||
0x200D: None, # ZERO WIDTH JOINER
|
0x200D: None,
|
||||||
0xFEFF: None, # ZERO WIDTH NO-BREAK SPACE
|
0xFEFF: None,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
def _canonicalize_name(name: str) -> str:
|
def _canonicalize_name(name: str) -> str:
|
||||||
# 规范化到 NFKC,并移除零宽字符
|
# 规范化到 NFKC,并移除零宽字符
|
||||||
name = unicodedata.normalize('NFKC', name)
|
name = unicodedata.normalize('NFKC', name)
|
||||||
name = _strip_zero_width(name)
|
name = _strip_zero_width(name)
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
def _normalize_for_compare(name: str) -> str:
|
def _normalize_for_compare(name: str) -> str:
|
||||||
# 进一步规范化用于宽松比较:
|
# 进一步规范化用于宽松比较
|
||||||
# - 统一大小写
|
|
||||||
# - 将下划线视为空格(与文件名用下划线代替空格的情况匹配)
|
|
||||||
# - 折叠所有空白为一个空格,并去除首尾空格
|
|
||||||
n = _canonicalize_name(name)
|
n = _canonicalize_name(name)
|
||||||
n = n.replace('_', ' ')
|
n = n.replace('_', ' ')
|
||||||
n = ' '.join(n.split())
|
n = ' '.join(n.split())
|
||||||
return n.lower()
|
return n.lower()
|
||||||
|
|
||||||
|
|
||||||
def _find_matching_file(folder: str, expected_name: str):
|
def _find_matching_file(folder: str, expected_name: str):
|
||||||
# 首先进行严格匹配(规范化后相等)
|
# 首先进行严格匹配(规范化后相等)
|
||||||
expected = _canonicalize_name(expected_name)
|
expected = _canonicalize_name(expected_name)
|
||||||
|
|
@ -179,10 +175,13 @@ def _find_matching_file(folder: str, expected_name: str):
|
||||||
if _canonicalize_name(f).lower() == expected_lower:
|
if _canonicalize_name(f).lower() == expected_lower:
|
||||||
return f
|
return f
|
||||||
|
|
||||||
# 宽松策略:将下划线当作空格处理,并折叠空白(用于匹配 "Crocodile grain" vs "Crocodile_grain")
|
# 宽松策略:将下划线当作空格处理,并折叠空白
|
||||||
expected_relaxed = _normalize_for_compare(expected_name)
|
expected_relaxed = _normalize_for_compare(expected_name)
|
||||||
for f in entries:
|
for f in entries:
|
||||||
if _normalize_for_compare(f) == expected_relaxed:
|
if _normalize_for_compare(f) == expected_relaxed:
|
||||||
return f
|
return f
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['load_data']
|
||||||
|
|
@ -0,0 +1,165 @@
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use('Agg')
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from mpl_toolkits.mplot3d import Axes3D
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def save_to_xlsx(project_name, file_name, data):
|
||||||
|
folder_path = f'Result/{project_name}'
|
||||||
|
os.makedirs(folder_path, exist_ok=True)
|
||||||
|
data.to_excel(f'{folder_path}/{file_name}.xlsx', index=True)
|
||||||
|
print('Save successed to ' + f'{folder_path}/{file_name}.xlsx')
|
||||||
|
save_to_pic(project_name=project_name, file_name=file_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def save_to_pic(project_name, file_name):
|
||||||
|
os.makedirs(f'Result/{project_name}', exist_ok=True)
|
||||||
|
if file_name == 'pca_2d':
|
||||||
|
draw_pca_2d(f'Result/{project_name}/{file_name}.xlsx')
|
||||||
|
print('Save successed to ' + f'Result/{project_name}/{file_name}.png')
|
||||||
|
elif file_name == 'pca_3d':
|
||||||
|
draw_pca_3d(f'Result/{project_name}/{file_name}.xlsx')
|
||||||
|
print('Save successed to ' + f'Result/{project_name}/{file_name}.png')
|
||||||
|
elif file_name == 'acc_and_loss':
|
||||||
|
draw_epoch_data(f'Result/{project_name}/{file_name}.xlsx')
|
||||||
|
draw_last_epoch_bar_chart(f'Result/{project_name}/{file_name}.xlsx')
|
||||||
|
print('Save successed to line graph and bar graph')
|
||||||
|
elif file_name == 'cm':
|
||||||
|
draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
|
||||||
|
print('Save successed cm')
|
||||||
|
elif file_name == 'cmn':
|
||||||
|
draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
|
||||||
|
print('Save successed cmn')
|
||||||
|
else:
|
||||||
|
print('unknow picture type')
|
||||||
|
|
||||||
|
|
||||||
|
def draw_pca_2d(file_path):
|
||||||
|
df = pd.read_excel(file_path)
|
||||||
|
plt.figure(figsize=(8, 6))
|
||||||
|
plt.scatter(df['PC1'], df['PC2'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
|
||||||
|
plt.xlabel('PC1')
|
||||||
|
plt.ylabel('PC2')
|
||||||
|
plt.title('2D PCA')
|
||||||
|
plt.colorbar(label='Labels')
|
||||||
|
plt.savefig(file_path.replace('.xlsx', '.png'))
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
|
def draw_pca_3d(file_path):
|
||||||
|
df = pd.read_excel(file_path)
|
||||||
|
fig = plt.figure(figsize=(8, 6))
|
||||||
|
ax = fig.add_subplot(111, projection='3d')
|
||||||
|
scatter = ax.scatter(df['PC1'], df['PC2'], df['PC3'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
|
||||||
|
ax.set_xlabel('PC1')
|
||||||
|
ax.set_ylabel('PC2')
|
||||||
|
ax.set_zlabel('PC3')
|
||||||
|
ax.set_title('3D PCA')
|
||||||
|
fig.colorbar(scatter, ax=ax, label='Labels')
|
||||||
|
plt.savefig(file_path.replace('.xlsx', '.png'))
|
||||||
|
|
||||||
|
|
||||||
|
def draw_epoch_data(file_path):
|
||||||
|
df = pd.read_excel(file_path)
|
||||||
|
epochs = df['epoch']
|
||||||
|
train_loss = df['train_loss']
|
||||||
|
train_accuracy = df['train_accuracy'] * 100
|
||||||
|
test_accuracy = df['test_accuracy'] * 100
|
||||||
|
f1_score = df['f1_score']
|
||||||
|
precision = df['precision']
|
||||||
|
recall = df['recall']
|
||||||
|
|
||||||
|
fig, axs = plt.subplots(2, 3, figsize=(18, 12))
|
||||||
|
|
||||||
|
axs[0, 0].plot(epochs, train_loss, 'b-', label='Train Loss')
|
||||||
|
axs[0, 0].set_xlabel('Epoch')
|
||||||
|
axs[0, 0].set_ylabel('Loss')
|
||||||
|
axs[0, 0].set_title('Training Loss over Epochs')
|
||||||
|
axs[0, 0].legend()
|
||||||
|
|
||||||
|
axs[0, 1].plot(epochs, train_accuracy, 'g-', label='Train Accuracy')
|
||||||
|
axs[0, 1].plot(epochs, test_accuracy, 'r-', label='Test Accuracy')
|
||||||
|
axs[0, 1].set_xlabel('Epoch')
|
||||||
|
axs[0, 1].set_ylabel('Accuracy (%)')
|
||||||
|
axs[0, 1].set_title('Train and Test Accuracy over Epochs')
|
||||||
|
axs[0, 1].legend()
|
||||||
|
|
||||||
|
axs[0, 2].plot(epochs, f1_score, 'm-', label='F1 Score')
|
||||||
|
axs[0, 2].set_xlabel('Epoch')
|
||||||
|
axs[0, 2].set_ylabel('F1 Score')
|
||||||
|
axs[0, 2].set_title('F1 Score over Epochs')
|
||||||
|
axs[0, 2].legend()
|
||||||
|
|
||||||
|
axs[1, 0].plot(epochs, precision, 'c-', label='Precision')
|
||||||
|
axs[1, 0].set_xlabel('Epoch')
|
||||||
|
axs[1, 0].set_ylabel('Precision')
|
||||||
|
axs[1, 0].set_title('Precision over Epochs')
|
||||||
|
axs[1, 0].legend()
|
||||||
|
|
||||||
|
axs[1, 1].plot(epochs, recall, 'y-', label='Recall')
|
||||||
|
axs[1, 1].set_xlabel('Epoch')
|
||||||
|
axs[1, 1].set_ylabel('Recall')
|
||||||
|
axs[1, 1].set_title('Recall over Epochs')
|
||||||
|
axs[1, 1].legend()
|
||||||
|
|
||||||
|
axs[1, 2].axis('off')
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(file_path.replace('.xlsx', '_epoch.png'))
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
|
def draw_last_epoch_bar_chart(file_path):
|
||||||
|
df = pd.read_excel(file_path)
|
||||||
|
last_epoch_data = df.iloc[-1]
|
||||||
|
|
||||||
|
metrics = ['train_loss', 'train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall']
|
||||||
|
values = [last_epoch_data[metric] for metric in metrics]
|
||||||
|
labels = ['Train Loss', 'Train Accuracy', 'Test Accuracy', 'F1 Score', 'Precision', 'Recall']
|
||||||
|
|
||||||
|
values[1] *= 100
|
||||||
|
values[2] *= 100
|
||||||
|
|
||||||
|
plt.figure(figsize=(10, 6))
|
||||||
|
plt.bar(labels, values, color=['blue', 'green', 'red', 'magenta', 'cyan', 'yellow'])
|
||||||
|
plt.xlabel('Metrics')
|
||||||
|
plt.ylabel('Values')
|
||||||
|
plt.title('Last Epoch Metrics')
|
||||||
|
plt.ylim(bottom=0)
|
||||||
|
|
||||||
|
for i, value in enumerate(values):
|
||||||
|
plt.text(i, value + 0.01, f'{value:.2f}', ha='center')
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(file_path.replace('.xlsx', '_last_epoch_bar.png'))
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
|
def draw_and_save_cm(file_path):
|
||||||
|
df_cm = pd.read_excel(file_path)
|
||||||
|
|
||||||
|
labels = df_cm.columns[1:].tolist()
|
||||||
|
cm = df_cm.values[:, 1:]
|
||||||
|
|
||||||
|
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
|
||||||
|
|
||||||
|
axs[0].imshow(cm, interpolation='nearest', cmap='Blues')
|
||||||
|
axs[0].set_title('Confusion Matrix')
|
||||||
|
axs[0].set_xlabel('Predicted')
|
||||||
|
axs[0].set_ylabel('True')
|
||||||
|
axs[0].set_xticks(np.arange(len(labels)))
|
||||||
|
axs[0].set_yticks(np.arange(len(labels)))
|
||||||
|
axs[0].set_xticklabels(labels)
|
||||||
|
axs[0].set_yticklabels(labels)
|
||||||
|
|
||||||
|
for i in range(len(labels)):
|
||||||
|
for j in range(len(labels)):
|
||||||
|
axs[0].text(j, i, f'{cm[i, j]}', ha='center', va='center')
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(file_path.replace('.xlsx', '.png'))
|
||||||
|
plt.close()
|
||||||
|
|
@ -1,173 +0,0 @@
|
||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from mpl_toolkits.mplot3d import Axes3D
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
def save_to_xlsx(project_name, file_name, data):
|
|
||||||
folder_path = f'Result/{project_name}'
|
|
||||||
os.makedirs(folder_path, exist_ok=True)
|
|
||||||
data.to_excel(f'{folder_path}/{file_name}.xlsx', index=True)
|
|
||||||
print("Save successed to " + f'{folder_path}/{file_name}.xlsx')
|
|
||||||
save_to_pic(project_name=project_name, file_name=file_name)
|
|
||||||
return
|
|
||||||
|
|
||||||
def save_to_pic(project_name, file_name):
|
|
||||||
os.makedirs(f'Result/{project_name}', exist_ok=True)
|
|
||||||
if file_name == 'pca_2d':
|
|
||||||
draw_pca_2d(f'Result/{project_name}/{file_name}.xlsx')
|
|
||||||
print("Save successed to " + f'Result/{project_name}/{file_name}.png')
|
|
||||||
elif file_name == 'pca_3d':
|
|
||||||
draw_pca_3d(f'Result/{project_name}/{file_name}.xlsx')
|
|
||||||
print("Save successed to " + f'Result/{project_name}/{file_name}.png')
|
|
||||||
elif file_name == 'acc_and_loss':
|
|
||||||
draw_epoch_data(f'Result/{project_name}/{file_name}.xlsx')
|
|
||||||
draw_last_epoch_bar_chart(f'Result/{project_name}/{file_name}.xlsx')
|
|
||||||
print("Save successed to line graph and bar graph")
|
|
||||||
elif file_name == 'cm':
|
|
||||||
draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
|
|
||||||
print("Save successed cm")
|
|
||||||
elif file_name == 'cmn':
|
|
||||||
draw_and_save_cm(f'Result/{project_name}/{file_name}.xlsx')
|
|
||||||
print("Save successed cmn")
|
|
||||||
else:
|
|
||||||
print("unknow picture type")
|
|
||||||
|
|
||||||
|
|
||||||
def draw_pca_2d(file_path):
|
|
||||||
df = pd.read_excel(file_path)
|
|
||||||
plt.figure(figsize=(8, 6))
|
|
||||||
plt.scatter(df['PC1'], df['PC2'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
|
|
||||||
plt.xlabel('PC1')
|
|
||||||
plt.ylabel('PC2')
|
|
||||||
plt.title('2D PCA')
|
|
||||||
plt.colorbar(label='Labels')
|
|
||||||
plt.savefig(file_path.replace('.xlsx', '.png'))
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def draw_pca_3d(file_path):
|
|
||||||
df = pd.read_excel(file_path)
|
|
||||||
fig = plt.figure(figsize=(8, 6))
|
|
||||||
ax = fig.add_subplot(111, projection='3d')
|
|
||||||
scatter = ax.scatter(df['PC1'], df['PC2'], df['PC3'], c=df['labels'], cmap='viridis', edgecolor='k', alpha=0.6)
|
|
||||||
ax.set_xlabel('PC1')
|
|
||||||
ax.set_ylabel('PC2')
|
|
||||||
ax.set_zlabel('PC3')
|
|
||||||
ax.set_title('3D PCA')
|
|
||||||
fig.colorbar(scatter, ax=ax, label='Labels')
|
|
||||||
plt.savefig(file_path.replace('.xlsx', '.png'))
|
|
||||||
|
|
||||||
def draw_epoch_data(file_path):
|
|
||||||
df = pd.read_excel(file_path)
|
|
||||||
epochs = df['epoch']
|
|
||||||
train_loss = df['train_loss']
|
|
||||||
train_accuracy = df['train_accuracy'] * 100
|
|
||||||
test_accuracy = df['test_accuracy'] * 100
|
|
||||||
f1_score = df['f1_score']
|
|
||||||
precision = df['precision']
|
|
||||||
recall = df['recall']
|
|
||||||
|
|
||||||
fig, axs = plt.subplots(2, 3, figsize=(18, 12))
|
|
||||||
|
|
||||||
# 折线图:训练损失
|
|
||||||
axs[0, 0].plot(epochs, train_loss, 'b-', label='Train Loss')
|
|
||||||
axs[0, 0].set_xlabel('Epoch')
|
|
||||||
axs[0, 0].set_ylabel('Loss')
|
|
||||||
axs[0, 0].set_title('Training Loss over Epochs')
|
|
||||||
axs[0, 0].legend()
|
|
||||||
|
|
||||||
# 折线图:训练准确率和测试准确率
|
|
||||||
axs[0, 1].plot(epochs, train_accuracy, 'g-', label='Train Accuracy')
|
|
||||||
axs[0, 1].plot(epochs, test_accuracy, 'r-', label='Test Accuracy')
|
|
||||||
axs[0, 1].set_xlabel('Epoch')
|
|
||||||
axs[0, 1].set_ylabel('Accuracy (%)')
|
|
||||||
axs[0, 1].set_title('Train and Test Accuracy over Epochs')
|
|
||||||
axs[0, 1].legend()
|
|
||||||
|
|
||||||
# 折线图:F1 Score
|
|
||||||
axs[0, 2].plot(epochs, f1_score, 'm-', label='F1 Score')
|
|
||||||
axs[0, 2].set_xlabel('Epoch')
|
|
||||||
axs[0, 2].set_ylabel('F1 Score')
|
|
||||||
axs[0, 2].set_title('F1 Score over Epochs')
|
|
||||||
axs[0, 2].legend()
|
|
||||||
|
|
||||||
# 折线图:Precision
|
|
||||||
axs[1, 0].plot(epochs, precision, 'c-', label='Precision')
|
|
||||||
axs[1, 0].set_xlabel('Epoch')
|
|
||||||
axs[1, 0].set_ylabel('Precision')
|
|
||||||
axs[1, 0].set_title('Precision over Epochs')
|
|
||||||
axs[1, 0].legend()
|
|
||||||
|
|
||||||
# 折线图:Recall
|
|
||||||
axs[1, 1].plot(epochs, recall, 'y-', label='Recall')
|
|
||||||
axs[1, 1].set_xlabel('Epoch')
|
|
||||||
axs[1, 1].set_ylabel('Recall')
|
|
||||||
axs[1, 1].set_title('Recall over Epochs')
|
|
||||||
axs[1, 1].legend()
|
|
||||||
|
|
||||||
# 空白或额外的图表空间(如果需要)
|
|
||||||
axs[1, 2].axis('off')
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(file_path.replace('.xlsx', '_epoch.png'))
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def draw_last_epoch_bar_chart(file_path):
|
|
||||||
df = pd.read_excel(file_path)
|
|
||||||
last_epoch_data = df.iloc[-1]
|
|
||||||
|
|
||||||
metrics = ['train_loss', 'train_accuracy', 'test_accuracy', 'f1_score', 'precision', 'recall']
|
|
||||||
values = [last_epoch_data[metric] for metric in metrics]
|
|
||||||
labels = ['Train Loss', 'Train Accuracy', 'Test Accuracy', 'F1 Score', 'Precision', 'Recall']
|
|
||||||
|
|
||||||
# 调整数值格式
|
|
||||||
values[1] *= 100 # Train Accuracy
|
|
||||||
values[2] *= 100 # Test Accuracy
|
|
||||||
|
|
||||||
plt.figure(figsize=(10, 6))
|
|
||||||
plt.bar(labels, values, color=['blue', 'green', 'red', 'magenta', 'cyan', 'yellow'])
|
|
||||||
plt.xlabel('Metrics')
|
|
||||||
plt.ylabel('Values')
|
|
||||||
plt.title('Last Epoch Metrics')
|
|
||||||
plt.ylim(bottom=0)
|
|
||||||
|
|
||||||
# 添加数值标签
|
|
||||||
for i, value in enumerate(values):
|
|
||||||
plt.text(i, value + 0.01, f'{value:.2f}', ha='center')
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(file_path.replace('.xlsx', '_last_epoch_bar.png'))
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def draw_and_save_cm(file_path):
|
|
||||||
# 读取 Excel 文件
|
|
||||||
df_cm = pd.read_excel(file_path)
|
|
||||||
|
|
||||||
# 获取标签(假设 DataFrame 的列为类别标签)
|
|
||||||
labels = df_cm.columns[1:].tolist()
|
|
||||||
|
|
||||||
# 获取混淆矩阵和归一化混淆矩阵的数值
|
|
||||||
cm = df_cm.values[:, 1:]
|
|
||||||
|
|
||||||
# 创建一个图像和子图
|
|
||||||
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
|
|
||||||
|
|
||||||
# 绘制普通混淆矩阵
|
|
||||||
axs[0].imshow(cm, interpolation='nearest', cmap='Blues')
|
|
||||||
axs[0].set_title('Confusion Matrix')
|
|
||||||
axs[0].set_xlabel('Predicted')
|
|
||||||
axs[0].set_ylabel('True')
|
|
||||||
axs[0].set_xticks(np.arange(len(labels)))
|
|
||||||
axs[0].set_yticks(np.arange(len(labels)))
|
|
||||||
axs[0].set_xticklabels(labels)
|
|
||||||
axs[0].set_yticklabels(labels)
|
|
||||||
|
|
||||||
# 添加数值标签
|
|
||||||
for i in range(len(labels)):
|
|
||||||
for j in range(len(labels)):
|
|
||||||
axs[0].text(j, i, f'{cm[i, j]}', ha='center', va='center')
|
|
||||||
|
|
||||||
# 调整布局并保存图像
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(file_path.replace('.xlsx', '.png'))
|
|
||||||
plt.close()
|
|
||||||
|
|
@ -1,28 +0,0 @@
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
|
||||||
|
|
||||||
def divSet(data, labels = None, test_size=0.2, random_state=None):
|
|
||||||
|
|
||||||
encoder = LabelEncoder()
|
|
||||||
|
|
||||||
# 最后一列是标签
|
|
||||||
X = data.iloc[:, :-1]
|
|
||||||
y = data.iloc[:, -1]
|
|
||||||
|
|
||||||
if labels:
|
|
||||||
labels = encoder.fit_transform(labels)
|
|
||||||
else:
|
|
||||||
encoder.fit(y)
|
|
||||||
|
|
||||||
# 分割数据集为训练集和测试集
|
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
|
|
||||||
# 标准化特征
|
|
||||||
scaler = StandardScaler()
|
|
||||||
X_train = scaler.fit_transform(X_train)
|
|
||||||
X_test = scaler.transform(X_test)
|
|
||||||
|
|
||||||
# 编码标签
|
|
||||||
y_train = encoder.transform(y_train.values.reshape(-1, 1))
|
|
||||||
y_test = encoder.transform(y_test.values.reshape(-1, 1))
|
|
||||||
|
|
||||||
return X_train, X_test, y_train, y_test, encoder
|
|
||||||
|
|
@ -15,7 +15,7 @@ class Qmlp(Qnn):
|
||||||
super(Qmlp, self).__init__(data=data, labels=labels, test_size=test_size, random_state=random_state)
|
super(Qmlp, self).__init__(data=data, labels=labels, test_size=test_size, random_state=random_state)
|
||||||
|
|
||||||
input_size = self.X_train.shape[1]
|
input_size = self.X_train.shape[1]
|
||||||
num_classes = len(np.unique(self.y_train))
|
num_classes = len(labels) if labels is not None else int(np.max(self.y_train)) + 1
|
||||||
self.layers = nn.ModuleList()
|
self.layers = nn.ModuleList()
|
||||||
|
|
||||||
# 连接输入层和第一个隐藏层
|
# 连接输入层和第一个隐藏层
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.decomposition import PCA
|
from sklearn.decomposition import PCA
|
||||||
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
|
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
|
||||||
from torch.utils.data import DataLoader, TensorDataset
|
from torch.utils.data import DataLoader, TensorDataset
|
||||||
|
|
||||||
from Qtorch import divSet as DS
|
from Qfunctions.divSet import divSet as DS
|
||||||
# from Qfunctions.saveToxlsx import save_to_xlsx as stx
|
|
||||||
|
|
||||||
|
|
||||||
class Qnn(nn.Module):
|
class Qnn(nn.Module):
|
||||||
|
|
@ -45,11 +45,9 @@ class Qnn(nn.Module):
|
||||||
|
|
||||||
# 将data转换为tensor形式
|
# 将data转换为tensor形式
|
||||||
X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32)
|
X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32)
|
||||||
self.y_train = self.LABEL_ENCODER.fit_transform(self.y_train)
|
|
||||||
y_train_tensor = torch.tensor(self.y_train, dtype=torch.long)
|
y_train_tensor = torch.tensor(self.y_train, dtype=torch.long)
|
||||||
|
|
||||||
X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32)
|
X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32)
|
||||||
self.y_test = self.LABEL_ENCODER.transform(self.y_test)
|
|
||||||
y_test_tensor = torch.tensor(self.y_test, dtype=torch.long)
|
y_test_tensor = torch.tensor(self.y_test, dtype=torch.long)
|
||||||
|
|
||||||
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
||||||
|
|
@ -116,9 +114,9 @@ class Qnn(nn.Module):
|
||||||
all_prob.extend(prob.cpu().numpy())
|
all_prob.extend(prob.cpu().numpy())
|
||||||
|
|
||||||
test_accuracy = correct_test / total_test
|
test_accuracy = correct_test / total_test
|
||||||
f1 = f1_score(all_labels, all_predicted, average='macro')
|
f1 = f1_score(all_labels, all_predicted, average='macro', zero_division=0)
|
||||||
precision = precision_score(all_labels, all_predicted, average='macro')
|
precision = precision_score(all_labels, all_predicted, average='macro', zero_division=0)
|
||||||
recall = recall_score(all_labels, all_predicted, average='macro')
|
recall = recall_score(all_labels, all_predicted, average='macro', zero_division=0)
|
||||||
|
|
||||||
if (epoch + 1) % 10 == 0:
|
if (epoch + 1) % 10 == 0:
|
||||||
print('===============================================')
|
print('===============================================')
|
||||||
|
|
@ -148,8 +146,10 @@ class Qnn(nn.Module):
|
||||||
break
|
break
|
||||||
|
|
||||||
# cmn为归一化矩阵
|
# cmn为归一化矩阵
|
||||||
self.cm = confusion_matrix(all_labels, all_predicted)
|
# Keep matrix dimensions stable even when some classes do not appear in this split.
|
||||||
self.cmn = confusion_matrix(all_labels, all_predicted, normalize='true')
|
cm_labels = np.arange(len(self.labels)) if self.labels is not None else None
|
||||||
|
self.cm = confusion_matrix(all_labels, all_predicted, labels=cm_labels)
|
||||||
|
self.cmn = confusion_matrix(all_labels, all_predicted, labels=cm_labels, normalize='true')
|
||||||
|
|
||||||
print(self.cm)
|
print(self.cm)
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,2 @@
|
||||||
# Qtorch/__init__.py
|
# Qtorch/__init__.py
|
||||||
from .Functions.divSet import divSet
|
|
||||||
from .Models import Qnn, Qmlp, Qcnn
|
from .Models import Qnn, Qmlp, Qcnn
|
||||||
|
|
@ -143,8 +143,8 @@ Wood <-> Wood.xlsx 或 Wood/
|
||||||
```python
|
```python
|
||||||
from Qtorch.Models.Qmlp import Qmlp
|
from Qtorch.Models.Qmlp import Qmlp
|
||||||
from Qfunctions.divSet import divSet
|
from Qfunctions.divSet import divSet
|
||||||
from Qfunctions.loaData import load_data
|
from Qfunctions.loadData import load_data
|
||||||
from Qfunctions.saveToxlsx import save_to_xlsx
|
from Qfunctions.saveToXlsx import save_to_xlsx
|
||||||
|
|
||||||
projet_name = '20241009MaterialDiv'
|
projet_name = '20241009MaterialDiv'
|
||||||
label_names = ['Acrlic', 'Ecoflex', 'PDMS', 'PLA', 'Wood']
|
label_names = ['Acrlic', 'Ecoflex', 'PDMS', 'PLA', 'Wood']
|
||||||
|
|
|
||||||
12
main.py
12
main.py
|
|
@ -1,7 +1,6 @@
|
||||||
from Qtorch.Models.Qmlp import Qmlp
|
from Qtorch.Models.Qmlp import Qmlp
|
||||||
from Qfunctions.divSet import divSet
|
from Qfunctions.loadData import load_data
|
||||||
from Qfunctions.loaData import load_data
|
from Qfunctions.saveToXlsx import save_to_xlsx as save_to_xlsx
|
||||||
from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# 输入元数据文件夹名称
|
# 输入元数据文件夹名称
|
||||||
|
|
@ -11,13 +10,12 @@ def main():
|
||||||
label_names = list(range(10))
|
label_names = list(range(10))
|
||||||
print(label_names)
|
print(label_names)
|
||||||
data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx')
|
data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx')
|
||||||
X_train, X_test, y_train, y_test, encoder = divSet(
|
|
||||||
data=data, labels=label_names, test_size= 0.3
|
|
||||||
)
|
|
||||||
|
|
||||||
model = Qmlp(
|
model = Qmlp(
|
||||||
X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
|
data=data,
|
||||||
|
labels=label_names,
|
||||||
hidden_layers = [128, 256, 128],
|
hidden_layers = [128, 256, 128],
|
||||||
|
test_size=0.3,
|
||||||
dropout_rate=0
|
dropout_rate=0
|
||||||
)
|
)
|
||||||
# model = QCNN(
|
# model = QCNN(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue