update some

This commit is contained in:
newbie 2024-10-19 11:07:59 +08:00
parent ee552e7d1c
commit a9f9059735
7 changed files with 132 additions and 73 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
Static Static
Result Result
__pycache__
SVM

View File

@ -43,7 +43,20 @@ def load_data(folder, labelNames, isDir):
return data return data
def load_from_folder(folder, labelNames): def load_from_folder(folder, labelNames):
pass all_features = []
for labelName in labelNames:
subfolder = os.path.join(folder, labelName)
if os.path.exists(subfolder) and os.path.isdir(subfolder):
fileNames = [f for f in os.listdir(subfolder) if f.endswith('.xlsx')]
max_row_length = get_max_row_len(subfolder, fileNames)
features = []
for fileName in fileNames:
file_path = os.path.join(subfolder, fileName)
features.append(load_xlsx(file_path, labelName, max_row_length, 'zero'))
if features:
all_features.append(pd.concat(features, ignore_index=True))
# 将所有标签的数据合并
return pd.concat(all_features, ignore_index=True)
def load_from_file(folder, labelNames): def load_from_file(folder, labelNames):
fileNames = [labelName + ".xlsx" for labelName in labelNames] fileNames = [labelName + ".xlsx" for labelName in labelNames]
@ -61,11 +74,16 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None):
# 提取偶数列 # 提取偶数列
features = df.iloc[0:, 1::2] features = df.iloc[0:, 1::2]
# 计算变化率 # 复制 features DataFrame
first_value = features.iloc[0, :] # 获取第一行的数据 features_copy = features.copy()
features_pct_change = (features - first_value) / first_value # 使用 pd.concat 来追加副本到原始 DataFrame
features = pd.concat([features, features_copy], ignore_index=True, axis=1)
features = features_pct_change # 计算变化率
# first_value = features.iloc[0, :] # 获取第一行的数据
# features_pct_change = (features - first_value) / first_value
# features = features_pct_change
features.dropna(inplace=True) features.dropna(inplace=True)
features.reset_index(drop=True, inplace=True) features.reset_index(drop=True, inplace=True)

View File

@ -83,7 +83,7 @@ class Qmlp(nn.Module):
model = self.to(DEVICE) model = self.to(DEVICE)
criterion = nn.CrossEntropyLoss() criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5) optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
best_test_accuracy = 0 best_test_accuracy = 0
patience = 100 patience = 100

114
main.py
View File

@ -4,14 +4,12 @@ from Qfunctions.divSet import divSet
from Qfunctions.loaData import load_data as dLoader from Qfunctions.loaData import load_data as dLoader
from sklearn.decomposition import PCA from sklearn.decomposition import PCA
import torch
def main(): def main():
projet_name = '20241005Sound' projet_name = '20241009MaterialDiv'
label_names =["Accuracy", "Compress", "Distance", "Loss", "Metal", "Python"] label_names =["Acrylic", "Ecoflex", "PDMS", "PLA", "Wood"]
# data = dLoader(projet_name, label_names, isDir=False) data = dLoader(projet_name, label_names, isDir=True)
X_train, X_test, y_train, y_test, encoder = divSet( X_train, X_test, y_train, y_test, encoder = divSet(
data=data, labels=label_names, test_size= 0.2 data=data, labels=label_names, test_size= 0.3
) )
print(y_train) print(y_train)
@ -27,27 +25,123 @@ def main():
df_pca3d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3']) df_pca3d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3'])
df_pca3d['labels'] = y_train df_pca3d['labels'] = y_train
# 保存为CSV文件 # 保存为xlsx文件
import os import os
folder = os.path.join("./Result", projet_name) folder = os.path.join("./Result", projet_name)
if not os.path.exists(folder):
os.makedirs(folder)
df_pca2d.to_excel(os.path.join(folder, 'pca_2d_points_with_labels.xlsx'), index=False) df_pca2d.to_excel(os.path.join(folder, 'pca_2d_points_with_labels.xlsx'), index=False)
df_pca3d.to_excel(os.path.join(folder, 'pca_3d_points_with_labels.xlsx'), index=False) df_pca3d.to_excel(os.path.join(folder, 'pca_3d_points_with_labels.xlsx'), index=False)
""" clf = SVC(kernel='rbf', C=1.0)
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
# 定义参数分布
param_dist = {'C': uniform(0.1, 100)}
# 随机搜索
random_search = RandomizedSearchCV(SVC(kernel='rbf'), param_distributions=param_dist,
n_iter=20, cv=5, scoring='accuracy')
random_search.fit(X_train, y_train)
# 获取最佳参数和模型
best_C = random_search.best_params_['C']
best_model = random_search.best_estimator_
# 评估
y_pred = best_model.predict(X_test)
from sklearn.metrics import accuracy_score, confusion_matrix
accuracy = accuracy_score(y_test, y_pred)
print(f"Best C: {best_C}")
print(f"Best accuracy: {accuracy}" )"""
"""
from sklearn import svm
from sklearn.model_selection import train_test_split, learning_curve, cross_val_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# 进行交叉验证
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean CV score: {cv_scores.mean():.3f} (+/- {cv_scores.std() * 2:.3f})")
# 计算学习曲线
train_sizes, train_scores, test_scores = learning_curve(
clf, X_train, y_train, cv=5, n_jobs=-1,
train_sizes=np.linspace(.1, 1.0, 5))
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
# 绘制学习曲线
plt.figure(figsize=(10, 6))
plt.title("Learning Curve")
plt.xlabel("Training examples")
plt.ylabel("Score")
plt.grid()
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.1, color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.1, color="g")
plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score")
plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score")
plt.legend(loc="best")
plt.show()
# 在全部训练数据上训练模型
clf.fit(X_train, y_train)
# 预测
y_pred = clf.predict(X_test)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.3f}")
# 计算归一化的混淆矩阵
cm = confusion_matrix(y_test, y_pred, normalize='true')
# 绘制混淆矩阵
plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='.2f', cmap='Blues')
plt.title('Normalized Confusion Matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show() """
"""
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
from sklearn.metrics import accuracy_score, confusion_matrix
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
# 计算归一化的混淆矩阵
cm = confusion_matrix(y_test, y_pred, normalize='true')
print(cm) """
model = Qmlp( model = Qmlp(
X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
hidden_layers=[64, 64], hidden_layers=[128, 128],
dropout_rate=0 dropout_rate=0
) )
model.fit(100) model.fit(300)
cm = model.get_cm() cm = model.get_cm()
epoch_data = model.get_epoch_data() epoch_data = model.get_epoch_data()
from Qfunctions.saveToxlsx import save_to_xlsx as stx from Qfunctions.saveToxlsx import save_to_xlsx as stx
stx(project_name=projet_name, file_name="cm", data=cm) stx(project_name=projet_name, file_name="cm", data=cm )
stx(project_name=projet_name, file_name="acc_and_loss", data=epoch_data) stx(project_name=projet_name, file_name="acc_and_loss", data=epoch_data)
print("Done") print("Done")
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,55 +0,0 @@
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
# 读取Excel文件
df = pd.read_excel('loss-metal-compress.xlsx')
# 假设你的模型需要的数据是前300行
data = df.iloc[300:600, 1].values
# 将数据转换为Tensor
data_tensor = torch.tensor(data, dtype=torch.float32).unsqueeze(0) # 增加一个批次维度
# 需要填充的0的数量
padding_size = 371 - data_tensor.size(1)
# 如果需要填充的0的数量大于0则进行填充
if padding_size > 0:
# 创建一个形状为[1, padding_size]的0张量
padding_tensor = torch.zeros(1, padding_size, dtype=torch.float32)
# 将原始数据和0张量拼接起来
data_tensor_padded = torch.cat((data_tensor, padding_tensor), dim=1)
else:
data_tensor_padded = data_tensor
# 包装成TensorDataset和DataLoader
dataset = TensorDataset(data_tensor_padded)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
# 确定设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = 'cpu'
print(f"Using device: {device}")
# 加载你的模型
model = torch.load('Sound.pth', map_location=device) # 确保模型加载到正确的设备
model.to(device) # 再次确保模型在正确的设备上
model.eval() # 设置为评估模式
# 进行预测
predictions = []
with torch.no_grad():
for batch in dataloader:
inputs = batch[0].to(device) # 将输入数据移动到相同的设备
outputs = model(inputs)
_, predicted = torch.max(outputs, 1)
predictions.extend(predicted.cpu().numpy()) # 将预测结果移动回CPU并转换为numpy数组
# 打印预测结果
print(predictions)