283 lines
9.1 KiB
Python
283 lines
9.1 KiB
Python
# frofrom Qtorch.Functions import dLoader
|
|
from Qtorch.Models.Qmlp import Qmlp
|
|
from Qfunctions.divSet import divSet
|
|
from Qfunctions.loaData import load_data as dLoader
|
|
from sklearn.decomposition import PCA
|
|
|
|
def main():
|
|
projet_name = '20241009MaterialDiv'
|
|
label_names =["Acrylic", "Ecoflex", "PDMS", "PLA", "Wood"]
|
|
data = dLoader(projet_name, label_names, isDir=True)
|
|
X_train, X_test, y_train, y_test, encoder = divSet(
|
|
data=data, labels=label_names, test_size= 0.3
|
|
)
|
|
|
|
print(y_train)
|
|
|
|
import pandas as pd
|
|
pca = PCA(n_components=2) # 保留两个主成分
|
|
principalComponents = pca.fit_transform(X_train)
|
|
df_pca2d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2'])
|
|
df_pca2d['labels'] = y_train
|
|
|
|
pca = PCA(n_components=3) # 保留三个主成分
|
|
principalComponents = pca.fit_transform(X_train)
|
|
df_pca3d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3'])
|
|
df_pca3d['labels'] = y_train
|
|
|
|
# 保存为xlsx文件
|
|
import os
|
|
folder = os.path.join("./Result", projet_name)
|
|
if not os.path.exists(folder):
|
|
os.makedirs(folder)
|
|
df_pca2d.to_excel(os.path.join(folder, 'pca_2d_points_with_labels.xlsx'), index=False)
|
|
df_pca3d.to_excel(os.path.join(folder, 'pca_3d_points_with_labels.xlsx'), index=False)
|
|
|
|
""" clf = SVC(kernel='rbf', C=1.0)
|
|
from sklearn.model_selection import RandomizedSearchCV
|
|
from scipy.stats import uniform
|
|
|
|
# 定义参数分布
|
|
param_dist = {'C': uniform(0.1, 100)}
|
|
|
|
# 随机搜索
|
|
random_search = RandomizedSearchCV(SVC(kernel='rbf'), param_distributions=param_dist,
|
|
n_iter=20, cv=5, scoring='accuracy')
|
|
random_search.fit(X_train, y_train)
|
|
|
|
# 获取最佳参数和模型
|
|
best_C = random_search.best_params_['C']
|
|
best_model = random_search.best_estimator_
|
|
|
|
# 评估
|
|
y_pred = best_model.predict(X_test)
|
|
from sklearn.metrics import accuracy_score, confusion_matrix
|
|
accuracy = accuracy_score(y_test, y_pred)
|
|
|
|
print(f"Best C: {best_C}")
|
|
print(f"Best accuracy: {accuracy}" )"""
|
|
|
|
"""
|
|
from sklearn import svm
|
|
from sklearn.model_selection import train_test_split, learning_curve, cross_val_score
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
|
|
# 进行交叉验证
|
|
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)
|
|
print(f"Cross-validation scores: {cv_scores}")
|
|
print(f"Mean CV score: {cv_scores.mean():.3f} (+/- {cv_scores.std() * 2:.3f})")
|
|
|
|
|
|
# 计算学习曲线
|
|
train_sizes, train_scores, test_scores = learning_curve(
|
|
clf, X_train, y_train, cv=5, n_jobs=-1,
|
|
train_sizes=np.linspace(.1, 1.0, 5))
|
|
|
|
train_scores_mean = np.mean(train_scores, axis=1)
|
|
train_scores_std = np.std(train_scores, axis=1)
|
|
test_scores_mean = np.mean(test_scores, axis=1)
|
|
test_scores_std = np.std(test_scores, axis=1)
|
|
|
|
# 绘制学习曲线
|
|
plt.figure(figsize=(10, 6))
|
|
plt.title("Learning Curve")
|
|
plt.xlabel("Training examples")
|
|
plt.ylabel("Score")
|
|
plt.grid()
|
|
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
|
|
train_scores_mean + train_scores_std, alpha=0.1, color="r")
|
|
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
|
|
test_scores_mean + test_scores_std, alpha=0.1, color="g")
|
|
plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score")
|
|
plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score")
|
|
plt.legend(loc="best")
|
|
plt.show()
|
|
|
|
# 在全部训练数据上训练模型
|
|
clf.fit(X_train, y_train)
|
|
|
|
# 预测
|
|
y_pred = clf.predict(X_test)
|
|
|
|
# 计算准确率
|
|
accuracy = accuracy_score(y_test, y_pred)
|
|
print(f"Test Accuracy: {accuracy:.3f}")
|
|
|
|
# 计算归一化的混淆矩阵
|
|
cm = confusion_matrix(y_test, y_pred, normalize='true')
|
|
|
|
# 绘制混淆矩阵
|
|
plt.figure(figsize=(10,7))
|
|
sns.heatmap(cm, annot=True, fmt='.2f', cmap='Blues')
|
|
plt.title('Normalized Confusion Matrix')
|
|
plt.ylabel('True label')
|
|
plt.xlabel('Predicted label')
|
|
plt.show() """
|
|
|
|
"""
|
|
model.fit(X_train, y_train)
|
|
y_pred = model.predict(X_test)
|
|
from sklearn.metrics import accuracy_score, confusion_matrix
|
|
accuracy = accuracy_score(y_test, y_pred)
|
|
print(f"Accuracy: {accuracy}")
|
|
|
|
# 计算归一化的混淆矩阵
|
|
cm = confusion_matrix(y_test, y_pred, normalize='true')
|
|
print(cm) """
|
|
|
|
|
|
model = Qmlp(
|
|
X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
|
|
hidden_layers=[128, 128],
|
|
dropout_rate=0
|
|
)
|
|
model.fit(300)
|
|
|
|
cm = model.get_cm()
|
|
epoch_data = model.get_epoch_data()
|
|
|
|
from Qfunctions.saveToxlsx import save_to_xlsx as stx
|
|
stx(project_name=projet_name, file_name="cm", data=cm )
|
|
stx(project_name=projet_name, file_name="acc_and_loss", data=epoch_data)
|
|
|
|
print("Done")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|
|
|
|
|
|
|
|
# from sklearn.model_selection import train_test_split
|
|
# from sklearn.preprocessing import StandardScaler
|
|
# from sklearn.svm import SVC
|
|
# from sklearn.model_selection import GridSearchCV
|
|
# from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
|
# import pandas as pd
|
|
|
|
# if __name__ == '__main__':
|
|
|
|
# project_name = '20240829Letters'
|
|
# labels = None
|
|
|
|
# data = ld(project_name, labels)
|
|
|
|
|
|
|
|
|
|
# svm = SVM(
|
|
# data=data,
|
|
# labels=labels
|
|
# )
|
|
|
|
# svm.fit()
|
|
|
|
# X, y = data.iloc[:, :-1], data.iloc[:, -1]
|
|
|
|
# # 分割数据
|
|
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=None)
|
|
|
|
# # 标准化数据
|
|
# scaler = StandardScaler()
|
|
# X_train_scaled = scaler.fit_transform(X_train)
|
|
# X_test_scaled = scaler.transform(X_test)
|
|
|
|
# # 创建 SVM 分类器
|
|
# svm = SVC(kernel='rbf', random_state=42)
|
|
|
|
# # 定义参数网格
|
|
# param_grid = {
|
|
# 'C': [0.1, 1, 10, 100],
|
|
# 'gamma': ['scale', 'auto', 0.1, 1, 10]
|
|
# }
|
|
|
|
# # 使用网格搜索进行超参数调优
|
|
# grid_search = GridSearchCV(svm, param_grid, cv=5, n_jobs=-1, verbose=2)
|
|
# grid_search.fit(X_train_scaled, y_train)
|
|
|
|
# # 打印最佳参数
|
|
# print("Best parameters:", grid_search.best_params_)
|
|
|
|
# # 使用最佳参数的模型
|
|
# best_svm = grid_search.best_estimator_
|
|
|
|
# # 计算训练集和测试集准确率
|
|
# y_train_pred = best_svm.predict(X_train_scaled)
|
|
# train_acc = accuracy_score(y_train, y_train_pred)
|
|
|
|
# y_test_pred = best_svm.predict(X_test_scaled)
|
|
# test_acc = accuracy_score(y_test, y_test_pred)
|
|
|
|
# # 在测试集上进行预测
|
|
# y_pred = best_svm.predict(X_test_scaled)
|
|
|
|
# # 计算准确率
|
|
# accuracy = accuracy_score(y_test, y_pred)
|
|
# print(f"Accuracy: {accuracy}")
|
|
|
|
# # 打印详细的分类报告
|
|
# print(classification_report(y_test, y_pred))
|
|
# # 计算并可视化混淆矩阵
|
|
# cm = confusion_matrix(y_test, y_test_pred, normalize='true')
|
|
|
|
# print(cm)
|
|
# # model = QSVM(
|
|
# # data=data,
|
|
# # labels=labels
|
|
# # )
|
|
|
|
# # model.fit(300)
|
|
# # model.save(project_name)
|
|
|
|
|
|
# # 创建一个 Excel 写入器
|
|
# # 将分类报告转换为DataFrame
|
|
# # 获取分类报告
|
|
|
|
# report = classification_report(y_test, y_test_pred, output_dict=True)
|
|
|
|
# df_report = pd.DataFrame(report).transpose()
|
|
# with pd.ExcelWriter(f'./Result/{project_name}/svm_results.xlsx') as writer:
|
|
# from sklearn.decomposition import PCA
|
|
# pca = PCA()
|
|
# X_pca = pca.fit_transform(X)
|
|
# # 创建 2D PCA 坐标的 DataFrame
|
|
# df_pca_2d = pd.DataFrame(data = X_pca[:, :2], columns = ['First Principal Component', 'Second Principal Component'])
|
|
# df_pca_2d['Label'] = y
|
|
# # 创建 3D PCA 坐标的 DataFrame
|
|
# df_pca_3d = pd.DataFrame(data = X_pca[:, :3], columns = ['First Principal Component', 'Second Principal Component', 'Third Principal Component'])
|
|
# df_pca_3d['Label'] = y
|
|
|
|
# # 将 2D PCA 坐标写入 Excel
|
|
# df_pca_2d.to_excel(writer, sheet_name='PCA 2D Coordinates', index=False)
|
|
# df_pca_3d.to_excel(writer, sheet_name='PCA 3D Coordinates', index=False)
|
|
|
|
|
|
# # 将分类报告写入Excel
|
|
# df_report.to_excel(writer, sheet_name='Classification Report')
|
|
|
|
# # 将最佳参数写入Excel
|
|
# pd.DataFrame([grid_search.best_params_]).to_excel(writer, sheet_name='Best Parameters')
|
|
|
|
# # 如果你想保存混淆矩阵
|
|
# from sklearn.metrics import confusion_matrix
|
|
# # 创建混淆矩阵并添加标签
|
|
# cm = confusion_matrix(y_test, y_test_pred, normalize='true')
|
|
# df_cm = pd.DataFrame(cm, index=labels, columns=labels)
|
|
# df_cm.index.name = 'True'
|
|
# df_cm.columns.name = 'Predicted'
|
|
|
|
# # 将混淆矩阵写入Excel
|
|
# df_cm.to_excel(writer, sheet_name='Confusion Matrix')
|
|
|
|
# # 如果你想保存训练集和测试集的准确率
|
|
# train_accuracy = best_svm.score(X_train_scaled, y_train)
|
|
# test_accuracy = best_svm.score(X_test_scaled, y_test)
|
|
# pd.DataFrame({
|
|
# 'Train Accuracy': [train_accuracy],
|
|
# 'Test Accuracy': [test_accuracy]
|
|
# }).to_excel(writer, sheet_name='Accuracy')
|
|
|
|
# print("Results have been saved to 'svm_results.xlsx'") |