diff --git a/.gitignore b/.gitignore index 2bb627b..992c925 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ Static -Result \ No newline at end of file +Result +__pycache__ +SVM \ No newline at end of file diff --git a/Qfunctions/__pycache__/loaData.cpython-312.pyc b/Qfunctions/__pycache__/loaData.cpython-312.pyc index da10d4c..a625ceb 100644 Binary files a/Qfunctions/__pycache__/loaData.cpython-312.pyc and b/Qfunctions/__pycache__/loaData.cpython-312.pyc differ diff --git a/Qfunctions/loaData.py b/Qfunctions/loaData.py index 0646adf..397354c 100644 --- a/Qfunctions/loaData.py +++ b/Qfunctions/loaData.py @@ -43,7 +43,20 @@ def load_data(folder, labelNames, isDir): return data def load_from_folder(folder, labelNames): - pass + all_features = [] + for labelName in labelNames: + subfolder = os.path.join(folder, labelName) + if os.path.exists(subfolder) and os.path.isdir(subfolder): + fileNames = [f for f in os.listdir(subfolder) if f.endswith('.xlsx')] + max_row_length = get_max_row_len(subfolder, fileNames) + features = [] + for fileName in fileNames: + file_path = os.path.join(subfolder, fileName) + features.append(load_xlsx(file_path, labelName, max_row_length, 'zero')) + if features: + all_features.append(pd.concat(features, ignore_index=True)) + # 将所有标签的数据合并 + return pd.concat(all_features, ignore_index=True) def load_from_file(folder, labelNames): fileNames = [labelName + ".xlsx" for labelName in labelNames] @@ -61,11 +74,16 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None): # 提取偶数列 features = df.iloc[0:, 1::2] - # 计算变化率 - first_value = features.iloc[0, :] # 获取第一行的数据 - features_pct_change = (features - first_value) / first_value + # 复制 features DataFrame + features_copy = features.copy() + # 使用 pd.concat 来追加副本到原始 DataFrame + features = pd.concat([features, features_copy], ignore_index=True, axis=1) - features = features_pct_change + # 计算变化率 + # first_value = features.iloc[0, :] # 获取第一行的数据 + # features_pct_change = (features - first_value) / first_value + + # features = features_pct_change features.dropna(inplace=True) features.reset_index(drop=True, inplace=True) diff --git a/Qtorch/Models/Qmlp.py b/Qtorch/Models/Qmlp.py index 841bb90..aeefd6e 100644 --- a/Qtorch/Models/Qmlp.py +++ b/Qtorch/Models/Qmlp.py @@ -83,7 +83,7 @@ class Qmlp(nn.Module): model = self.to(DEVICE) criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5) + optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10) best_test_accuracy = 0 patience = 100 diff --git a/Qtorch/Models/__pycache__/Qmlp.cpython-312.pyc b/Qtorch/Models/__pycache__/Qmlp.cpython-312.pyc index 5d11c6f..0ec563b 100644 Binary files a/Qtorch/Models/__pycache__/Qmlp.cpython-312.pyc and b/Qtorch/Models/__pycache__/Qmlp.cpython-312.pyc differ diff --git a/main.py b/main.py index 53e68e4..c174de5 100644 --- a/main.py +++ b/main.py @@ -4,14 +4,12 @@ from Qfunctions.divSet import divSet from Qfunctions.loaData import load_data as dLoader from sklearn.decomposition import PCA -import torch - def main(): - projet_name = '20241005Sound' - label_names =["Accuracy", "Compress", "Distance", "Loss", "Metal", "Python"] - # data = dLoader(projet_name, label_names, isDir=False) + projet_name = '20241009MaterialDiv' + label_names =["Acrylic", "Ecoflex", "PDMS", "PLA", "Wood"] + data = dLoader(projet_name, label_names, isDir=True) X_train, X_test, y_train, y_test, encoder = divSet( - data=data, labels=label_names, test_size= 0.2 + data=data, labels=label_names, test_size= 0.3 ) print(y_train) @@ -27,27 +25,123 @@ def main(): df_pca3d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3']) df_pca3d['labels'] = y_train - # 保存为CSV文件 + # 保存为xlsx文件 import os folder = os.path.join("./Result", projet_name) + if not os.path.exists(folder): + os.makedirs(folder) df_pca2d.to_excel(os.path.join(folder, 'pca_2d_points_with_labels.xlsx'), index=False) df_pca3d.to_excel(os.path.join(folder, 'pca_3d_points_with_labels.xlsx'), index=False) + """ clf = SVC(kernel='rbf', C=1.0) + from sklearn.model_selection import RandomizedSearchCV + from scipy.stats import uniform + + # 定义参数分布 + param_dist = {'C': uniform(0.1, 100)} + + # 随机搜索 + random_search = RandomizedSearchCV(SVC(kernel='rbf'), param_distributions=param_dist, + n_iter=20, cv=5, scoring='accuracy') + random_search.fit(X_train, y_train) + + # 获取最佳参数和模型 + best_C = random_search.best_params_['C'] + best_model = random_search.best_estimator_ + + # 评估 + y_pred = best_model.predict(X_test) + from sklearn.metrics import accuracy_score, confusion_matrix + accuracy = accuracy_score(y_test, y_pred) + + print(f"Best C: {best_C}") + print(f"Best accuracy: {accuracy}" )""" + + """ + from sklearn import svm + from sklearn.model_selection import train_test_split, learning_curve, cross_val_score + import numpy as np + import matplotlib.pyplot as plt + import seaborn as sns + + # 进行交叉验证 + cv_scores = cross_val_score(clf, X_train, y_train, cv=5) + print(f"Cross-validation scores: {cv_scores}") + print(f"Mean CV score: {cv_scores.mean():.3f} (+/- {cv_scores.std() * 2:.3f})") + + + # 计算学习曲线 + train_sizes, train_scores, test_scores = learning_curve( + clf, X_train, y_train, cv=5, n_jobs=-1, + train_sizes=np.linspace(.1, 1.0, 5)) + + train_scores_mean = np.mean(train_scores, axis=1) + train_scores_std = np.std(train_scores, axis=1) + test_scores_mean = np.mean(test_scores, axis=1) + test_scores_std = np.std(test_scores, axis=1) + + # 绘制学习曲线 + plt.figure(figsize=(10, 6)) + plt.title("Learning Curve") + plt.xlabel("Training examples") + plt.ylabel("Score") + plt.grid() + plt.fill_between(train_sizes, train_scores_mean - train_scores_std, + train_scores_mean + train_scores_std, alpha=0.1, color="r") + plt.fill_between(train_sizes, test_scores_mean - test_scores_std, + test_scores_mean + test_scores_std, alpha=0.1, color="g") + plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") + plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") + plt.legend(loc="best") + plt.show() + + # 在全部训练数据上训练模型 + clf.fit(X_train, y_train) + + # 预测 + y_pred = clf.predict(X_test) + + # 计算准确率 + accuracy = accuracy_score(y_test, y_pred) + print(f"Test Accuracy: {accuracy:.3f}") + + # 计算归一化的混淆矩阵 + cm = confusion_matrix(y_test, y_pred, normalize='true') + + # 绘制混淆矩阵 + plt.figure(figsize=(10,7)) + sns.heatmap(cm, annot=True, fmt='.2f', cmap='Blues') + plt.title('Normalized Confusion Matrix') + plt.ylabel('True label') + plt.xlabel('Predicted label') + plt.show() """ + + """ + model.fit(X_train, y_train) + y_pred = model.predict(X_test) + from sklearn.metrics import accuracy_score, confusion_matrix + accuracy = accuracy_score(y_test, y_pred) + print(f"Accuracy: {accuracy}") + + # 计算归一化的混淆矩阵 + cm = confusion_matrix(y_test, y_pred, normalize='true') + print(cm) """ model = Qmlp( X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, - hidden_layers=[64, 64], + hidden_layers=[128, 128], dropout_rate=0 ) - model.fit(100) + model.fit(300) cm = model.get_cm() epoch_data = model.get_epoch_data() - + from Qfunctions.saveToxlsx import save_to_xlsx as stx - stx(project_name=projet_name, file_name="cm", data=cm) + stx(project_name=projet_name, file_name="cm", data=cm ) stx(project_name=projet_name, file_name="acc_and_loss", data=epoch_data) + print("Done") if __name__ == '__main__': diff --git a/test2.py b/test2.py deleted file mode 100644 index b5c13f4..0000000 --- a/test2.py +++ /dev/null @@ -1,55 +0,0 @@ -import pandas as pd -import torch -from torch.utils.data import DataLoader, TensorDataset - -# 读取Excel文件 -df = pd.read_excel('loss-metal-compress.xlsx') - -# 假设你的模型需要的数据是前300行 -data = df.iloc[300:600, 1].values - - -# 将数据转换为Tensor -data_tensor = torch.tensor(data, dtype=torch.float32).unsqueeze(0) # 增加一个批次维度 - -# 需要填充的0的数量 -padding_size = 371 - data_tensor.size(1) - -# 如果需要填充的0的数量大于0,则进行填充 -if padding_size > 0: - # 创建一个形状为[1, padding_size]的0张量 - padding_tensor = torch.zeros(1, padding_size, dtype=torch.float32) - # 将原始数据和0张量拼接起来 - data_tensor_padded = torch.cat((data_tensor, padding_tensor), dim=1) -else: - data_tensor_padded = data_tensor - -# 包装成TensorDataset和DataLoader -dataset = TensorDataset(data_tensor_padded) -dataloader = DataLoader(dataset, batch_size=1, shuffle=False) - -# 确定设备 -device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -# device = 'cpu' -print(f"Using device: {device}") - -# 加载你的模型 -model = torch.load('Sound.pth', map_location=device) # 确保模型加载到正确的设备 -model.to(device) # 再次确保模型在正确的设备上 -model.eval() # 设置为评估模式 - -# 进行预测 -predictions = [] -with torch.no_grad(): - for batch in dataloader: - inputs = batch[0].to(device) # 将输入数据移动到相同的设备 - outputs = model(inputs) - _, predicted = torch.max(outputs, 1) - predictions.extend(predicted.cpu().numpy()) # 将预测结果移动回CPU并转换为numpy数组 - -# 打印预测结果 -print(predictions) - - - -