update some
This commit is contained in:
parent
ee552e7d1c
commit
a9f9059735
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
Static
|
||||
Result
|
||||
__pycache__
|
||||
SVM
|
Binary file not shown.
@ -43,7 +43,20 @@ def load_data(folder, labelNames, isDir):
|
||||
return data
|
||||
|
||||
def load_from_folder(folder, labelNames):
|
||||
pass
|
||||
all_features = []
|
||||
for labelName in labelNames:
|
||||
subfolder = os.path.join(folder, labelName)
|
||||
if os.path.exists(subfolder) and os.path.isdir(subfolder):
|
||||
fileNames = [f for f in os.listdir(subfolder) if f.endswith('.xlsx')]
|
||||
max_row_length = get_max_row_len(subfolder, fileNames)
|
||||
features = []
|
||||
for fileName in fileNames:
|
||||
file_path = os.path.join(subfolder, fileName)
|
||||
features.append(load_xlsx(file_path, labelName, max_row_length, 'zero'))
|
||||
if features:
|
||||
all_features.append(pd.concat(features, ignore_index=True))
|
||||
# 将所有标签的数据合并
|
||||
return pd.concat(all_features, ignore_index=True)
|
||||
|
||||
def load_from_file(folder, labelNames):
|
||||
fileNames = [labelName + ".xlsx" for labelName in labelNames]
|
||||
@ -61,11 +74,16 @@ def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None):
|
||||
|
||||
# 提取偶数列
|
||||
features = df.iloc[0:, 1::2]
|
||||
# 计算变化率
|
||||
first_value = features.iloc[0, :] # 获取第一行的数据
|
||||
features_pct_change = (features - first_value) / first_value
|
||||
# 复制 features DataFrame
|
||||
features_copy = features.copy()
|
||||
# 使用 pd.concat 来追加副本到原始 DataFrame
|
||||
features = pd.concat([features, features_copy], ignore_index=True, axis=1)
|
||||
|
||||
features = features_pct_change
|
||||
# 计算变化率
|
||||
# first_value = features.iloc[0, :] # 获取第一行的数据
|
||||
# features_pct_change = (features - first_value) / first_value
|
||||
|
||||
# features = features_pct_change
|
||||
|
||||
features.dropna(inplace=True)
|
||||
features.reset_index(drop=True, inplace=True)
|
||||
|
@ -83,7 +83,7 @@ class Qmlp(nn.Module):
|
||||
model = self.to(DEVICE)
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
|
||||
best_test_accuracy = 0
|
||||
patience = 100
|
||||
|
Binary file not shown.
112
main.py
112
main.py
@ -4,14 +4,12 @@ from Qfunctions.divSet import divSet
|
||||
from Qfunctions.loaData import load_data as dLoader
|
||||
from sklearn.decomposition import PCA
|
||||
|
||||
import torch
|
||||
|
||||
def main():
|
||||
projet_name = '20241005Sound'
|
||||
label_names =["Accuracy", "Compress", "Distance", "Loss", "Metal", "Python"]
|
||||
# data = dLoader(projet_name, label_names, isDir=False)
|
||||
projet_name = '20241009MaterialDiv'
|
||||
label_names =["Acrylic", "Ecoflex", "PDMS", "PLA", "Wood"]
|
||||
data = dLoader(projet_name, label_names, isDir=True)
|
||||
X_train, X_test, y_train, y_test, encoder = divSet(
|
||||
data=data, labels=label_names, test_size= 0.2
|
||||
data=data, labels=label_names, test_size= 0.3
|
||||
)
|
||||
|
||||
print(y_train)
|
||||
@ -27,20 +25,115 @@ def main():
|
||||
df_pca3d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3'])
|
||||
df_pca3d['labels'] = y_train
|
||||
|
||||
# 保存为CSV文件
|
||||
# 保存为xlsx文件
|
||||
import os
|
||||
folder = os.path.join("./Result", projet_name)
|
||||
if not os.path.exists(folder):
|
||||
os.makedirs(folder)
|
||||
df_pca2d.to_excel(os.path.join(folder, 'pca_2d_points_with_labels.xlsx'), index=False)
|
||||
df_pca3d.to_excel(os.path.join(folder, 'pca_3d_points_with_labels.xlsx'), index=False)
|
||||
|
||||
""" clf = SVC(kernel='rbf', C=1.0)
|
||||
from sklearn.model_selection import RandomizedSearchCV
|
||||
from scipy.stats import uniform
|
||||
|
||||
# 定义参数分布
|
||||
param_dist = {'C': uniform(0.1, 100)}
|
||||
|
||||
# 随机搜索
|
||||
random_search = RandomizedSearchCV(SVC(kernel='rbf'), param_distributions=param_dist,
|
||||
n_iter=20, cv=5, scoring='accuracy')
|
||||
random_search.fit(X_train, y_train)
|
||||
|
||||
# 获取最佳参数和模型
|
||||
best_C = random_search.best_params_['C']
|
||||
best_model = random_search.best_estimator_
|
||||
|
||||
# 评估
|
||||
y_pred = best_model.predict(X_test)
|
||||
from sklearn.metrics import accuracy_score, confusion_matrix
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
|
||||
print(f"Best C: {best_C}")
|
||||
print(f"Best accuracy: {accuracy}" )"""
|
||||
|
||||
"""
|
||||
from sklearn import svm
|
||||
from sklearn.model_selection import train_test_split, learning_curve, cross_val_score
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
|
||||
# 进行交叉验证
|
||||
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)
|
||||
print(f"Cross-validation scores: {cv_scores}")
|
||||
print(f"Mean CV score: {cv_scores.mean():.3f} (+/- {cv_scores.std() * 2:.3f})")
|
||||
|
||||
|
||||
# 计算学习曲线
|
||||
train_sizes, train_scores, test_scores = learning_curve(
|
||||
clf, X_train, y_train, cv=5, n_jobs=-1,
|
||||
train_sizes=np.linspace(.1, 1.0, 5))
|
||||
|
||||
train_scores_mean = np.mean(train_scores, axis=1)
|
||||
train_scores_std = np.std(train_scores, axis=1)
|
||||
test_scores_mean = np.mean(test_scores, axis=1)
|
||||
test_scores_std = np.std(test_scores, axis=1)
|
||||
|
||||
# 绘制学习曲线
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.title("Learning Curve")
|
||||
plt.xlabel("Training examples")
|
||||
plt.ylabel("Score")
|
||||
plt.grid()
|
||||
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
|
||||
train_scores_mean + train_scores_std, alpha=0.1, color="r")
|
||||
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
|
||||
test_scores_mean + test_scores_std, alpha=0.1, color="g")
|
||||
plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score")
|
||||
plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score")
|
||||
plt.legend(loc="best")
|
||||
plt.show()
|
||||
|
||||
# 在全部训练数据上训练模型
|
||||
clf.fit(X_train, y_train)
|
||||
|
||||
# 预测
|
||||
y_pred = clf.predict(X_test)
|
||||
|
||||
# 计算准确率
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
print(f"Test Accuracy: {accuracy:.3f}")
|
||||
|
||||
# 计算归一化的混淆矩阵
|
||||
cm = confusion_matrix(y_test, y_pred, normalize='true')
|
||||
|
||||
# 绘制混淆矩阵
|
||||
plt.figure(figsize=(10,7))
|
||||
sns.heatmap(cm, annot=True, fmt='.2f', cmap='Blues')
|
||||
plt.title('Normalized Confusion Matrix')
|
||||
plt.ylabel('True label')
|
||||
plt.xlabel('Predicted label')
|
||||
plt.show() """
|
||||
|
||||
"""
|
||||
model.fit(X_train, y_train)
|
||||
y_pred = model.predict(X_test)
|
||||
from sklearn.metrics import accuracy_score, confusion_matrix
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
print(f"Accuracy: {accuracy}")
|
||||
|
||||
# 计算归一化的混淆矩阵
|
||||
cm = confusion_matrix(y_test, y_pred, normalize='true')
|
||||
print(cm) """
|
||||
|
||||
|
||||
model = Qmlp(
|
||||
X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
|
||||
hidden_layers=[64, 64],
|
||||
hidden_layers=[128, 128],
|
||||
dropout_rate=0
|
||||
)
|
||||
model.fit(100)
|
||||
model.fit(300)
|
||||
|
||||
cm = model.get_cm()
|
||||
epoch_data = model.get_epoch_data()
|
||||
@ -48,6 +141,7 @@ def main():
|
||||
from Qfunctions.saveToxlsx import save_to_xlsx as stx
|
||||
stx(project_name=projet_name, file_name="cm", data=cm )
|
||||
stx(project_name=projet_name, file_name="acc_and_loss", data=epoch_data)
|
||||
|
||||
print("Done")
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
55
test2.py
55
test2.py
@ -1,55 +0,0 @@
|
||||
import pandas as pd
|
||||
import torch
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
|
||||
# 读取Excel文件
|
||||
df = pd.read_excel('loss-metal-compress.xlsx')
|
||||
|
||||
# 假设你的模型需要的数据是前300行
|
||||
data = df.iloc[300:600, 1].values
|
||||
|
||||
|
||||
# 将数据转换为Tensor
|
||||
data_tensor = torch.tensor(data, dtype=torch.float32).unsqueeze(0) # 增加一个批次维度
|
||||
|
||||
# 需要填充的0的数量
|
||||
padding_size = 371 - data_tensor.size(1)
|
||||
|
||||
# 如果需要填充的0的数量大于0,则进行填充
|
||||
if padding_size > 0:
|
||||
# 创建一个形状为[1, padding_size]的0张量
|
||||
padding_tensor = torch.zeros(1, padding_size, dtype=torch.float32)
|
||||
# 将原始数据和0张量拼接起来
|
||||
data_tensor_padded = torch.cat((data_tensor, padding_tensor), dim=1)
|
||||
else:
|
||||
data_tensor_padded = data_tensor
|
||||
|
||||
# 包装成TensorDataset和DataLoader
|
||||
dataset = TensorDataset(data_tensor_padded)
|
||||
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
|
||||
|
||||
# 确定设备
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
# device = 'cpu'
|
||||
print(f"Using device: {device}")
|
||||
|
||||
# 加载你的模型
|
||||
model = torch.load('Sound.pth', map_location=device) # 确保模型加载到正确的设备
|
||||
model.to(device) # 再次确保模型在正确的设备上
|
||||
model.eval() # 设置为评估模式
|
||||
|
||||
# 进行预测
|
||||
predictions = []
|
||||
with torch.no_grad():
|
||||
for batch in dataloader:
|
||||
inputs = batch[0].to(device) # 将输入数据移动到相同的设备
|
||||
outputs = model(inputs)
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
predictions.extend(predicted.cpu().numpy()) # 将预测结果移动回CPU并转换为numpy数组
|
||||
|
||||
# 打印预测结果
|
||||
print(predictions)
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user