This commit is contained in:
newbie 2024-10-07 09:54:32 +08:00
commit 884db9b926
56 changed files with 1345 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
Static
Result

15
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "main.py",
"console": "integratedTerminal"
}
]
}

0
Qfunctions/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

28
Qfunctions/divSet.py Normal file
View File

@ -0,0 +1,28 @@
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
def divSet(data, labels = None, test_size=0.2, random_state=None):
encoder = LabelEncoder()
# 最后一列是标签
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
if labels:
labels = encoder.fit_transform(labels)
else:
encoder.fit(y)
# 分割数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
# 标准化特征
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 编码标签
y_train = encoder.transform(y_train.values.reshape(-1, 1))
y_test = encoder.transform(y_test.values.reshape(-1, 1))
return X_train, X_test, y_train, y_test, encoder

97
Qfunctions/loaData.py Normal file
View File

@ -0,0 +1,97 @@
import os
import pandas as pd
STATIC_PATH = './Static'
# 从文件夹中读取所有xlsx文件每个文件对应一个label
# labelNames为label的名字如果不提供则默认为文件名
def load_data(folder, labelNames, isDir):
# 检查folder参数
if folder is None:
raise ValueError("The 'folder' parameter is required.")
# 检查labelNames参数
if labelNames is None:
raise ValueError("The 'labelNames' parameter is required if 'folder' does not contain labels.")
folder = os.path.join(STATIC_PATH, folder)
# 看看有没有元数据文件夹
if not os.path.isdir(folder):
raise ValueError(f"The folder '{folder}' does not exist.")
# fileNames = [f for f in os.listdir(folder) if f.endswith('.xlsx')]
# # 获取数据的最大行数
# max_row_length = get_max_row_len(folder, fileNames)
# all_features = []
# for i, fileName in enumerate(fileNames):
# features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero')
# all_features.append(features)
# data = pd.concat(all_features, ignore_index = True)
data = None
if not isDir:
data = load_from_file(folder=folder, labelNames=labelNames)
else:
data = load_from_folder(folder=folder, labelNames=labelNames)
print(data)
return data
def load_from_folder(folder, labelNames):
pass
def load_from_file(folder, labelNames):
fileNames = [labelName + ".xlsx" for labelName in labelNames]
# 获取数据的最大行数
max_row_length = get_max_row_len(folder, fileNames)
all_features = []
for i, fileName in enumerate(fileNames):
features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero')
all_features.append(features)
return pd.concat(all_features, ignore_index = True)
def load_xlsx(fileName, labelName, max_row_length = 1000, fill_rule = None):
df = pd.read_excel(fileName)
# 提取偶数列
features = df.iloc[0:, 1::2]
features.dropna(inplace=True)
features.reset_index(drop=True, inplace=True)
features = features.T
# 补全每一行到指定长度
features = features.apply(lambda row: fill_to_len(row, max_row_length, fill_rule), axis=1)
features['label'] = labelName
features.columns = [f'feature{i+1}' for i in range(max_row_length)] + ['label']
return features
def fill_to_len(row, length = 1000, rule = None):
fill_value = 0
if rule == 'min':
fill_value = row.min()
elif rule == 'mean':
fill_value = row.mean()
elif rule == 'zero':
fill_value = 0
fill_values = pd.Series([fill_value] * (length - len(row)))
return pd.concat([row, fill_values], ignore_index=True)
def get_max_row_len(folder, filenames):
max_len = 0
for filename in filenames:
df = pd.read_excel(os.path.join(folder, filename))
max_len = max(max_len, df.shape[0])
return max_len
__all__ = ['load_data']

13
Qfunctions/saveToxlsx.py Normal file
View File

@ -0,0 +1,13 @@
import os
def save_to_xlsx(project_name, file_name, data):
os.makedirs(f'Result/{project_name}', exist_ok=True)
data.to_excel(f'Result/{project_name}/{file_name}.xlsx', index=True)
print("Save successed to " + f'Result/{project_name}/{file_name}.xlsx')
# for filename,data in save_maps.items():
# data.to_excel(f'Result/{project_name}/{filename}.xlsx', index=True)
# print("Save successed to " + f'Result/{project_name}/{filename}.xlsx')
# print('Save to xlsx done!')
return

17
Qfunctions/test.py Normal file
View File

@ -0,0 +1,17 @@
import time
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
def MLP(X_train, X_test, y_train, y_test):
start_time = time.time()
# 训练 MLP 分类器
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
end_time = time.time()
# 打印训练时间
print("Training Time:", end_time - start_time, "seconds")
# 评估模型
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

14
Qtorch/Models/QSVM.py Normal file
View File

@ -0,0 +1,14 @@
from Qtorch.Models.Qnn import Qnn
from abc import ABC, abstractmethod
class QSVM(Qnn, ABC):
def __init__(self, data, labels=None, test_size=0.2, random_state=None):
super().__init__(data, labels, test_size, random_state)
self.result.update({
"pca_2d" : None,
"pca_3d" : None
})
@abstractmethod
def train_model(self, train_loader, test_loader, epochs):
return super().train_model(train_loader, test_loader, epochs)

74
Qtorch/Models/QSVM_BRF.py Normal file
View File

@ -0,0 +1,74 @@
import torch
import torch.nn as nn
import torch.optim as optim
from Qtorch.Models.QSVM import QSVM as svm
class QSVM_BRF(svm):
def __init__(self, data, labels=None, test_size=0.2, random_state=None,
gamma=1.0, C=100, batch_size = 64, learning_rate=0.01):
super().__init__(data, labels, test_size, random_state)
self.gamma, self.C, self.n_features = gamma, C, data.shape[0] - 1
self.support_vectors = torch.cat([batch[0] for batch in self.train_loader])
self.alpha = nn.Parameter(torch.zeros(self.support_vectors.shape[0]))
self.b = nn.Parameter(torch.zeros(1))
self.batch_size = batch_size
self.learning_rate = learning_rate
self.optimizer = optim.SGD(self.parameters(), lr=self.learning_rate)
def rbf_kernel(self, X, Y):
X_norm = (X**2).sum(1).view(-1, 1)
Y_norm = (Y**2).sum(1).view(1, -1)
dist = X_norm + Y_norm - 2.0 * torch.mm(X, Y.t())
return torch.exp(-self.gamma * dist)
def forward(self, X):
K = self.rbf_kernel(X, self.support_vectors)
return torch.mm(K, self.alpha.unsqueeze(1)).squeeze() + self.b
def hinge_loss(self, outputs, targets):
return torch.mean(torch.clamp(1 - outputs * targets, min=0))
def regularization(self):
return 0.5 * (self.alpha ** 2).sum()
def train_model(self, epoch_times=100, learning_rate=0.01):
losses, train_accs, test_accs = [], [], []
for epoch in range(epoch_times):
self.train()
epoch_loss, correct_train, total_train = 0, 0, 0
for batch_X, batch_y in self.train_loader:
self.optimizer.zero_grad()
outputs = self(batch_X)
loss = self.hinge_loss(outputs, batch_y) + self.C * self.regularization()
loss.backward()
self.optimizer.step()
epoch_loss += loss.item()
predicted = torch.sign(outputs)
correct_train += (predicted == batch_y).sum().item()
total_train += batch_y.size(0)
train_acc = correct_train / total_train
test_acc = self.evaluate()
losses.append(epoch_loss / len(self.train_loader))
train_accs.append(train_acc)
test_accs.append(test_acc)
print(f'Epoch [{epoch+1}/{epoch_times}], Loss: {losses[-1]:.4f}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
def evaluate(self):
self.eval()
correct = 0
total = 0
with torch.no_grad():
for batch_X, batch_y in self.test_loader:
outputs = self(batch_X)
predicted = torch.sign(outputs)
correct += (predicted == batch_y).sum().item()
total += batch_y.size(0)
return correct / total

180
Qtorch/Models/Qmlp.py Normal file
View File

@ -0,0 +1,180 @@
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
import pandas as pd
LABEL_ENCODER = LabelEncoder()
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class Qmlp(nn.Module):
epoch_data = {
'epoch': [],
'train_loss': [],
'train_accuracy': [],
'test_accuracy': []
}
labels = None
def __init__(self, X_train, y_train, X_test, y_test,
hidden_layers,
labels=None,
dropout_rate=0.3
):
super(Qmlp, self).__init__()
self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test
self.labels = labels
input_size = X_train.shape[1]
# input_size = 5
print(input_size)
num_classes = len(set(y_train))
self.layers = nn.ModuleList()
# Input layer to first hidden layer
self.layers.append(nn.Linear(input_size, hidden_layers[0]))
self.layers.append(nn.BatchNorm1d(hidden_layers[0]))
self.layers.append(nn.ReLU())
self.layers.append(nn.Dropout(dropout_rate))
# Create hidden layers
for i in range(1, len(hidden_layers)):
self.layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
self.layers.append(nn.BatchNorm1d(hidden_layers[i]))
self.layers.append(nn.ReLU())
self.layers.append(nn.Dropout(dropout_rate))
# Output layer
self.layers.append(nn.Linear(hidden_layers[-1], num_classes))
self.__init_weights()
def forward(self, x):
for layer in self.layers:
x = layer(x)
return x
def __prepare_data(self):
# Step 2: Prepare the data
X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32)
self.y_train = LABEL_ENCODER.fit_transform(self.y_train)
y_train_tensor = torch.tensor(self.y_train, dtype=torch.long)
X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32)
self.y_test = LABEL_ENCODER.transform(self.y_test)
y_test_tensor = torch.tensor(self.y_test, dtype=torch.long)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
return train_loader, test_loader
def __train_model(self, train_loader, test_loader, epochs_times=100):
model = self.to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
best_test_accuracy = 0
patience = 100
counter = 0
accuracy_threshold = 0.99 # 99% 的准确率阈值
for epoch in range(epochs_times):
model.train()
running_loss = 0.0
correct_train = 0
total_train = 0
for inputs, labels in train_loader:
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total_train += labels.size(0)
correct_train += (predicted == labels).sum().item()
train_accuracy = correct_train / total_train
train_loss = running_loss / len(train_loader)
model.eval()
correct_test = 0
total_test = 0
all_labels = []
all_predicted = []
all_prob = []
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
outputs = model(inputs)
prob = torch.nn.functional.softmax(outputs, dim=1)
_, predicted = torch.max(outputs.data, 1)
total_test += labels.size(0)
correct_test += (predicted == labels).sum().item()
all_labels.extend(labels.cpu().numpy())
all_predicted.extend(predicted.cpu().numpy())
all_prob.extend(prob.cpu().numpy())
test_accuracy = correct_test / total_test
print(f'Epoch [{epoch+1}/{epochs_times}], Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, Test Accuracy: {test_accuracy*100:.2f}%')
self.epoch_data['epoch'].append(epoch+1)
self.epoch_data['train_loss'].append(train_loss)
self.epoch_data['train_accuracy'].append(train_accuracy)
self.epoch_data['test_accuracy'].append(test_accuracy)
scheduler.step(train_loss)
if test_accuracy > best_test_accuracy:
best_test_accuracy = test_accuracy
counter = 0
else:
counter += 1
if counter >= patience and best_test_accuracy >= accuracy_threshold:
print(f"Early stopping at epoch {epoch+1}")
break
if self.labels:
# labels_encoded = LABEL_ENCODER.fit(self.labels)
self.cm = confusion_matrix(all_labels, all_predicted, normalize='true')
else:
self.cm = confusion_matrix(all_labels, all_predicted, normalize='true')
# self.cm = confusion_matrix(all_labels, all_predicted, normalize='true')
print(self.cm)
return
def get_cm(self):
return pd.DataFrame(self.cm, columns=self.labels, index=self.labels)
def get_epoch_data(self):
return pd.DataFrame(self.epoch_data)
def fit(self, epoch_times = 100):
train_loader, test_loader = self.__prepare_data()
self.__train_model(train_loader, test_loader, epochs_times=epoch_times)
return
def __init_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
nn.init.zeros_(m.bias)

105
Qtorch/Models/Qnn.py Normal file
View File

@ -0,0 +1,105 @@
import torch
import torch.nn as nn
import pandas as pd
from abc import ABC, abstractmethod
from sklearn.metrics import confusion_matrix as cm
from torch.utils.data import DataLoader, TensorDataset
from Qfunctions.divSet import divSet as ds
from Qfunctions.saveToxlsx import save_to_xlsx as stx
class Qnn(nn.Module, ABC):
def __init__(self, data, labels=None, test_size=0.2, random_state=None):
super(Qnn, self).__init__()
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 保存原始labe 混淆矩阵使用
self.original_labels = labels
# 划分训练集和测试集
X_train, X_test, y_train, y_test, self.labels = ds(
data=data,
labels=labels,
test_size=test_size,
random_state=random_state
)
self.train_loader, self.test_loader = self.__prepare_data(
X_train=X_train,
y_train=y_train,
X_test=X_test,
y_test=y_test
)
# 定义结果
self.result = {
'acc_and_loss' : {
'epoch' : [],
'loss': [],
'train_accuracy': [],
'test_accuracy': [],
},
'confusion_matrix': None,
}
def accuracy(self, output, target):
pass
# 定义损失函数
def hinge_loss(self, output, target):
pass
@abstractmethod
def train_model(self, train_loader, test_loader, epochs):
pass
def confusion_matrix(self, test_outputs):
predicted = torch.argmax(test_outputs, dim=1)
true_label = torch.argmax(self.y_test, dim=1)
return cm(predicted.cpu(), true_label.cpu())
def fit(self, epochs = 100):
self.train_model(epochs)
def save(self, project_name):
for filename, data in self.result.items():
if filename == 'confusion_matrix':
data = pd.DataFrame(data, columns=self.original_labels, index=self.original_labels)
stx(project_name, filename, data)
else:
data = pd.DataFrame(data)
stx(project_name, filename, data)
def __prepare_data(self, X_train, y_train, X_test, y_test):
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
print(train_loader, test_loader)
return train_loader, test_loader

View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

0
Qtorch/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

1
SVM Submodule

@ -0,0 +1 @@
Subproject commit 3aec5f294e817679e61b0833d4f750b9f118cfbc

49
catplus Executable file
View File

@ -0,0 +1,49 @@
#!/bin/bash
# 输出的Markdown文件名
output_file="python_files_output.md"
# 清空或创建输出文件
> "$output_file"
# 递归函数来处理目录
process_directory() {
local dir="$1"
local depth="$2"
# 添加目录作为标题
echo "$(printf '%0.s#' $(seq 1 $depth)) ${dir##*/}" >> "$output_file"
echo "" >> "$output_file"
# 遍历当前目录中的所有.py文件
for file in "$dir"/*.py; do
# 检查文件是否存在(以防止没有.py文件的情况
if [ -f "$file" ]; then
# 将文件名作为子标题写入md文件
echo "$(printf '%0.s#' $(seq 1 $((depth + 1)))) ${file##*/}" >> "$output_file"
echo "" >> "$output_file"
# 添加代码块开始标记
echo '```python' >> "$output_file"
# 将Python文件内容添加到md文件
cat "$file" >> "$output_file"
# 添加代码块结束标记
echo '```' >> "$output_file"
echo "" >> "$output_file"
fi
done
# 递归处理子目录
for subdir in "$dir"/*/; do
if [ -d "$subdir" ]; then
process_directory "$subdir" $((depth + 1))
fi
done
}
# 从当前目录开始处理
process_directory "." 1
echo "Markdown文件已生成: $output_file"

189
main.py Normal file
View File

@ -0,0 +1,189 @@
# frofrom Qtorch.Functions import dLoader
from Qtorch.Models.Qmlp import Qmlp
from Qfunctions.divSet import divSet
from Qfunctions.loaData import load_data as dLoader
from sklearn.decomposition import PCA
import torch
def main():
projet_name = '20241005Sound'
label_names =["Accuracy", "Compress", "Distance", "Loss", "Metal", "Python"]
data = dLoader(projet_name, label_names, isDir=False)
X_train, X_test, y_train, y_test, encoder = divSet(
data=data, labels=label_names, test_size= 0.5
)
print(y_train)
import pandas as pd
pca = PCA(n_components=2) # 保留两个主成分
principalComponents = pca.fit_transform(X_train)
df_pca2d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2'])
df_pca2d['labels'] = y_train
pca = PCA(n_components=3) # 保留三个主成分
principalComponents = pca.fit_transform(X_train)
df_pca3d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3'])
df_pca3d['labels'] = y_train
# 保存为CSV文件
import os
folder = os.path.join("./Result", projet_name)
df_pca2d.to_excel(os.path.join(folder, 'pca_2d_points_with_labels.xlsx'), index=False)
df_pca3d.to_excel(os.path.join(folder, 'pca_3d_points_with_labels.xlsx'), index=False)
# model = Qmlp(
# X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
# hidden_layers=[32, 32, 32],
# dropout_rate=0
# )
# model.fit(100)
# cm = model.get_cm()
# epoch_data = model.get_epoch_data()
# from Qfunctions.saveToxlsx import save_to_xlsx as stx
# stx(project_name=projet_name, file_name="cm", data=cm)
# stx(project_name=projet_name, file_name="acc_and_loss", data=epoch_data)
# print("Done")
if __name__ == '__main__':
main()
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.svm import SVC
# from sklearn.model_selection import GridSearchCV
# from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# import pandas as pd
# if __name__ == '__main__':
# project_name = '20240829Letters'
# labels = None
# data = ld(project_name, labels)
# svm = SVM(
# data=data,
# labels=labels
# )
# svm.fit()
# X, y = data.iloc[:, :-1], data.iloc[:, -1]
# # 分割数据
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=None)
# # 标准化数据
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)
# # 创建 SVM 分类器
# svm = SVC(kernel='rbf', random_state=42)
# # 定义参数网格
# param_grid = {
# 'C': [0.1, 1, 10, 100],
# 'gamma': ['scale', 'auto', 0.1, 1, 10]
# }
# # 使用网格搜索进行超参数调优
# grid_search = GridSearchCV(svm, param_grid, cv=5, n_jobs=-1, verbose=2)
# grid_search.fit(X_train_scaled, y_train)
# # 打印最佳参数
# print("Best parameters:", grid_search.best_params_)
# # 使用最佳参数的模型
# best_svm = grid_search.best_estimator_
# # 计算训练集和测试集准确率
# y_train_pred = best_svm.predict(X_train_scaled)
# train_acc = accuracy_score(y_train, y_train_pred)
# y_test_pred = best_svm.predict(X_test_scaled)
# test_acc = accuracy_score(y_test, y_test_pred)
# # 在测试集上进行预测
# y_pred = best_svm.predict(X_test_scaled)
# # 计算准确率
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy: {accuracy}")
# # 打印详细的分类报告
# print(classification_report(y_test, y_pred))
# # 计算并可视化混淆矩阵
# cm = confusion_matrix(y_test, y_test_pred, normalize='true')
# print(cm)
# # model = QSVM(
# # data=data,
# # labels=labels
# # )
# # model.fit(300)
# # model.save(project_name)
# # 创建一个 Excel 写入器
# # 将分类报告转换为DataFrame
# # 获取分类报告
# report = classification_report(y_test, y_test_pred, output_dict=True)
# df_report = pd.DataFrame(report).transpose()
# with pd.ExcelWriter(f'./Result/{project_name}/svm_results.xlsx') as writer:
# from sklearn.decomposition import PCA
# pca = PCA()
# X_pca = pca.fit_transform(X)
# # 创建 2D PCA 坐标的 DataFrame
# df_pca_2d = pd.DataFrame(data = X_pca[:, :2], columns = ['First Principal Component', 'Second Principal Component'])
# df_pca_2d['Label'] = y
# # 创建 3D PCA 坐标的 DataFrame
# df_pca_3d = pd.DataFrame(data = X_pca[:, :3], columns = ['First Principal Component', 'Second Principal Component', 'Third Principal Component'])
# df_pca_3d['Label'] = y
# # 将 2D PCA 坐标写入 Excel
# df_pca_2d.to_excel(writer, sheet_name='PCA 2D Coordinates', index=False)
# df_pca_3d.to_excel(writer, sheet_name='PCA 3D Coordinates', index=False)
# # 将分类报告写入Excel
# df_report.to_excel(writer, sheet_name='Classification Report')
# # 将最佳参数写入Excel
# pd.DataFrame([grid_search.best_params_]).to_excel(writer, sheet_name='Best Parameters')
# # 如果你想保存混淆矩阵
# from sklearn.metrics import confusion_matrix
# # 创建混淆矩阵并添加标签
# cm = confusion_matrix(y_test, y_test_pred, normalize='true')
# df_cm = pd.DataFrame(cm, index=labels, columns=labels)
# df_cm.index.name = 'True'
# df_cm.columns.name = 'Predicted'
# # 将混淆矩阵写入Excel
# df_cm.to_excel(writer, sheet_name='Confusion Matrix')
# # 如果你想保存训练集和测试集的准确率
# train_accuracy = best_svm.score(X_train_scaled, y_train)
# test_accuracy = best_svm.score(X_test_scaled, y_test)
# pd.DataFrame({
# 'Train Accuracy': [train_accuracy],
# 'Test Accuracy': [test_accuracy]
# }).to_excel(writer, sheet_name='Accuracy')
# print("Results have been saved to 'svm_results.xlsx'")

15
remake/.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "main.py",
"console": "integratedTerminal"
}
]
}

View File

@ -0,0 +1,5 @@
from .dataLoader import dLoader
from .dataSplitter import dsplit
from .resSaver import save_to_xlsx
__all__ = ['dLoader', 'dsplit', 'save_to_xlsx']

View File

@ -0,0 +1,91 @@
import os
import pandas as pd
STATIC_PATH = './Static'
def dLoader(folder, label_names=None):
"""
Load data from Excel files in a specified folder.
Args:
folder (str): Name of the folder containing Excel files.
label_names (list): Optional list of label names. If not provided, file names will be used.
Returns:
pandas.DataFrame: Loaded and processed data.
"""
folder_path = os.path.join(STATIC_PATH, folder)
file_names = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]
if not label_names:
label_names = [f.split('.')[0] for f in file_names]
max_row_length = get_max_row_len(folder_path, file_names)
all_features = []
for i, file_name in enumerate(file_names):
features = load_xlsx(os.path.join(folder_path, file_name), label_names[i], max_row_length)
all_features.append(features)
return pd.concat(all_features, ignore_index=True)
def load_xlsx(file_name, label_name, max_row_length, fill_rule='mean'):
"""
Load and process data from a single Excel file.
Args:
file_name (str): Path to the Excel file.
label_name (str): Label for the data in this file.
max_row_length (int): Maximum number of rows to consider.
fill_rule (str): Rule for filling missing values ('min', 'mean', or None).
Returns:
pandas.DataFrame: Processed data from the Excel file.
"""
df = pd.read_excel(file_name)
features = df.iloc[0:, 1::2]
features.dropna(inplace=True)
features.reset_index(drop=True, inplace=True)
features = features.T
features = features.apply(lambda row: fill_to_len(row, max_row_length, fill_rule), axis=1)
features['label'] = label_name
features.columns = [f'feature{i+1}' for i in range(max_row_length)] + ['label']
return features
def fill_to_len(row, length=1000, rule=None):
"""
Fill a row to a specified length.
Args:
row (pandas.Series): Row to fill.
length (int): Desired length of the row.
rule (str): Rule for filling ('min', 'mean', or None).
Returns:
pandas.Series: Filled row.
"""
fill_value = 0
if rule == 'min':
fill_value = row.min()
elif rule == 'mean':
fill_value = row.mean()
fill_values = pd.Series([fill_value] * (length - len(row)))
return pd.concat([row, fill_values], ignore_index=True)
def get_max_row_len(folder, filenames):
"""
Get the maximum row length across all Excel files in a folder.
Args:
folder (str): Path to the folder containing Excel files.
filenames (list): List of Excel file names.
Returns:
int: Maximum row length.
"""
return max(pd.read_excel(os.path.join(folder, filename)).shape[0] for filename in filenames)

View File

@ -0,0 +1,37 @@
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
def dsplit(data, labels=None, test_size=0.2, random_state=None):
"""
Split the dataset into training and testing sets.
Args:
data (pandas.DataFrame): Input data.
labels (list): Optional list of labels.
test_size (float): Proportion of the dataset to include in the test split.
random_state (int): Random state for reproducibility.
Returns:
tuple: X_train, X_test, y_train, y_test, encoded_labels
"""
encoder = LabelEncoder()
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
if labels is not None:
encoded_labels = encoder.fit_transform(labels)
else:
encoder.fit(y)
encoded_labels = None
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = encoder.transform(y_train.values.ravel())
y_test = encoder.transform(y_test.values.ravel())
return X_train, X_test, y_train, y_test, encoded_labels

View File

@ -0,0 +1,15 @@
import os
def save_to_xlsx(project_name, file_name, data):
"""
Save data to an Excel file.
Args:
project_name (str): Name of the project (used for folder name).
file_name (str): Name of the file to save.
data (pandas.DataFrame): Data to save.
"""
os.makedirs(f'Result/{project_name}', exist_ok=True)
file_path = f'Result/{project_name}/{file_name}.xlsx'
data.to_excel(file_path, index=True)
print(f"Data saved successfully to {file_path}")

View File

Binary file not shown.

View File

@ -0,0 +1,21 @@
from .Qnn import Qnn
class QSVM(Qnn):
def __init__(self, data, labels=None, test_size=0.2, random_state=None):
super(QSVM, self).__init__(data, labels, test_size, random_state)
self.result.update({
"pca_2d": None,
"pca_3d": None
})
def forward(self, x):
# Implement SVM forward pass
pass
def train_model(self, epochs):
# Implement SVM training logic
pass
def hinge_loss(self, output, target):
# Implement hinge loss
pass

View File

@ -0,0 +1,72 @@
import torch
import torch.nn as nn
import pandas as pd
from abc import ABC, abstractmethod
from Qtorch.Functions import dsplit
from Qtorch.Functions import save_to_xlsx as stx
# from sklearn.metrics import confusion_matrix
class Qnn(nn.Module, ABC):
def __init__(self, data, labels=None, test_size=0.2, random_state=None):
super(Qnn, self).__init__()
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.original_labels = labels
# Split data
self.X_train, self.X_test, self.y_train, self.y_test, self.labels = dsplit(
data=data,
labels=labels,
test_size=test_size,
random_state=random_state
)
self.train_loader, self.test_loader = self._prepare_data()
self.result = {
'acc_and_loss': {
'epoch': [],
'loss': [],
'train_accuracy': [],
'test_accuracy': [],
},
'confusion_matrix': None,
}
@abstractmethod
def forward(self, x):
pass
@abstractmethod
def train_model(self, epochs):
pass
def fit(self, epochs=100):
self.train_model(epochs)
def save(self, project_name):
for filename, data in self.result.items():
if filename == 'confusion_matrix':
data = pd.DataFrame(data, columns=self.original_labels, index=self.original_labels)
else:
data = pd.DataFrame(data)
stx(project_name, filename, data)
def _prepare_data(self):
X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(self.y_train, dtype=torch.long)
X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(self.y_test, dtype=torch.long)
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)
return train_loader, test_loader
# def confusion_matrix(self, test_outputs):
# predicted = torch.argmax(test_outputs, dim=1)
# true_label = torch.argmax(self.y_test, dim=1)
# return confusion_matrix(predicted.cpu(), true_label.cpu())

View File

@ -0,0 +1,114 @@
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from .QSVM import QSVM
from sklearn.metrics import confusion_matrix
class Qsvm_brf(QSVM):
def __init__(self, data, labels=None, test_size=0.2, random_state=None,
gamma=1.0, C=100, batch_size=64, learning_rate=0.01):
super(Qsvm_brf, self).__init__(data, labels, test_size, random_state)
self.to(self.device)
self.gamma = gamma
self.C = C
self.n_features = data.shape[1] - 1
self.support_vectors = torch.cat([batch[0] for batch in self.train_loader]).to(self.device)
self.alpha = nn.Parameter(torch.zeros(self.support_vectors.shape[0])).to(self.device)
self.b = nn.Parameter(torch.zeros(1)).to(self.device)
self.batch_size = batch_size
self.learning_rate = learning_rate
print(self.b, self.alpha)
print(list(self.parameters()))
def train_model(self, epochs):
self.to(self.device)
self.optimizer = optim.SGD(self.parameters(), lr=self.learning_rate)
for epoch in range(epochs):
self.train()
total_loss = 0
correct = 0
total = 0
progress_bar = tqdm(self.train_loader, desc=f'Epoch {epoch+1}/{epochs}')
for batch_X, batch_y in progress_bar:
batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device)
self.optimizer.zero_grad()
outputs = self(batch_X)
loss = self.hinge_loss(outputs, batch_y) + self.C * self.regularization()
loss.backward()
self.optimizer.step()
total_loss += loss.item()
predicted = torch.sign(outputs)
correct += (predicted == batch_y).sum().item()
total += batch_y.size(0)
progress_bar.set_postfix({
'Loss': total_loss / (progress_bar.n + 1),
'Acc': 100. * correct / total
})
train_accuracy = correct / total
test_accuracy = self.evaluate()
self.result['acc_and_loss']['epoch'].append(epoch + 1)
self.result['acc_and_loss']['loss'].append(total_loss / len(self.train_loader))
self.result['acc_and_loss']['train_accuracy'].append(train_accuracy)
self.result['acc_and_loss']['test_accuracy'].append(test_accuracy)
print(f'Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(self.train_loader):.4f}, '
f'Train Acc: {train_accuracy:.4f}, Test Acc: {test_accuracy:.4f}')
# 计算最终的混淆矩阵
self.result['confusion_matrix'] = self.compute_confusion_matrix()
def compute_confusion_matrix(self):
self.eval()
all_predictions = []
all_labels = []
with torch.no_grad():
for batch_X, batch_y in self.test_loader:
batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device)
outputs = self(batch_X)
predicted = torch.sign(outputs)
all_predictions.extend(predicted.cpu().numpy())
all_labels.extend(batch_y.cpu().numpy())
return confusion_matrix(all_labels, all_predictions)
def evaluate(self):
self.eval()
correct = 0
total = 0
with torch.no_grad():
for batch_X, batch_y in self.test_loader:
batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device)
outputs = self(batch_X)
predicted = torch.sign(outputs)
correct += (predicted == batch_y).sum().item()
total += batch_y.size(0)
return correct / total
def rbf_kernel(self, X, Y):
X_norm = (X**2).sum(1).view(-1, 1)
Y_norm = (Y**2).sum(1).view(1, -1)
dist = X_norm + Y_norm - 2.0 * torch.mm(X, Y.t())
return torch.exp(-self.gamma * dist)
def forward(self, X):
X = X.to(self.device)
K = self.rbf_kernel(X, self.support_vectors)
return torch.mm(K, self.alpha.unsqueeze(1)).squeeze() + self.b
def hinge_loss(self, outputs, targets):
return torch.mean(torch.clamp(1 - outputs * targets, min=0))
def regularization(self):
return 0.5 * (self.alpha ** 2).sum()

View File

@ -0,0 +1,3 @@
from .Qsvm_brf import Qsvm_brf
__all__ = ['Qsvm_brf']

Binary file not shown.

Binary file not shown.

0
remake/README.md Normal file
View File

23
remake/main.py Normal file
View File

@ -0,0 +1,23 @@
from Qtorch.Functions import dLoader
from Qtorch.Models.Qmlp import Qmlp
from Qfuctions.divSet import divSet
def main():
projet_name = '20240821Sound'
label_names = None
data = dLoader(projet_name, label_names)
X_train, X_test, y_train, y_test, encoded_labels =
if __name__ == '__main__':
main()

110
test.py Normal file
View File

@ -0,0 +1,110 @@
from graphviz import Digraph
import os
class layer:
def __init__(self, graph, name, size, color):
self.name = name
self.size = size
self.color = color
self.graph = graph
def draw():
pass
class input_layer(layer):
def __init__(self, graph, size):
super().__init__(graph, f"Input Layer({size})", size)
self.graph.node(self.name, shape='circle', style='filled', fillcolor=self.color, label=" ")
self.graph.attr(label=f'{self.name} Layer({self.size})', fontname='Times New Roman', fontweight='bold', fontsize='36')
def draw_neural_net(input_size, hidden_sizes, num_classes, show_hidden=3):
g = Digraph('G', filename='neural_network', format='png')
g.attr(rankdir='LR', size='10,8', nodesep='1', ranksep='2', bgcolor='transparent', dpi='300')
# Input layer
with g.subgraph(name='cluster_input') as c:
c.attr(color='white')
for i in range(input_size):
c.node(f'input_{i}', shape='circle', style='filled', fillcolor='darkorange:orange', label=" ")
c.attr(label=f'Input Layer({input_size})', fontname='Times New Roman', fontweight='bold', fontsize='36')
# Hidden layers
previous_layer = 'input'
previous_layer_size = input_size
for layer_idx, hidden_size in enumerate(hidden_sizes):
with g.subgraph(name=f'cluster_hidden_{layer_idx}') as c:
c.attr(color='white')
for i in range(show_hidden):
c.node(f'hidden_{layer_idx}_{i}', shape='circle', style='filled', fillcolor='darkgreen:lightgreen', label=" ")
if hidden_size > show_hidden * 2:
c.node(f'ellipsis_{layer_idx}', shape='plaintext', label='...')
for i in range(hidden_size - show_hidden, hidden_size):
c.node(f'hidden_{layer_idx}_{i}', shape='circle', style='filled', fillcolor='darkgreen:lightgreen', label=" ")
c.attr(label=f'Hidden Layer {layer_idx + 1}({hidden_size})', fontname='Times New Roman', fontweight='bold', fontsize='36')
# Add edges from previous layer to current hidden layer
if layer_idx == 0: # Only connect input layer to first hidden layer
for i in range(previous_layer_size):
for j in range(show_hidden):
g.edge(f'{previous_layer}_{i}', f'hidden_{layer_idx}_{j}')
for j in range(hidden_size - show_hidden, hidden_size):
g.edge(f'{previous_layer}_{i}', f'hidden_{layer_idx}_{j}')
else:
for i in range(show_hidden):
for j in range(show_hidden):
g.edge(f'hidden_{layer_idx - 1}_{i}', f'hidden_{layer_idx}_{j}')
for j in range(hidden_size - show_hidden, hidden_size):
g.edge(f'hidden_{layer_idx - 1}_{i}', f'hidden_{layer_idx}_{j}')
for i in range(hidden_size - show_hidden, hidden_size):
for j in range(show_hidden):
g.edge(f'hidden_{layer_idx - 1}_{i}', f'hidden_{layer_idx}_{j}')
for j in range(hidden_size - show_hidden, hidden_size):
g.edge(f'hidden_{layer_idx - 1}_{i}', f'hidden_{layer_idx}_{j}')
previous_layer = f'hidden_{layer_idx}'
previous_layer_size = hidden_size
# Output layer
with g.subgraph(name='cluster_output') as c:
c.attr(color='white')
for i in range(num_classes):
c.node(f'output_{i}', shape='circle', style='filled', fillcolor='darkorange:orange', label=" ")
c.attr(label=f'Output Layer({num_classes})', fontname='Times New Roman', fontweight='bold', fontsize='36')
# Add edges from last hidden layer to output layer
# for i in range(previous_layer_size):
# for j in range(num_classes):
# g.edge(f'{previous_layer}_{i}', f'output_{j}')
# # Add edges
# # Add edges from input to visible hidden nodes
# for i in range(input_size):
# for j in range(show_hidden):
# g.edge(f'input_{i}', f'hidden_{j}')
# for i in range(input_size):
# for j in range(hidden_size - show_hidden, hidden_size):
# g.edge(f'input_{i}', f'hidden_{j}')
# # Add edges from visible hidden nodes to output layer
# for i in range(show_hidden):
# for j in range(num_classes):
# g.edge(f'hidden_{i}', f'output_{j}')
# for i in range(hidden_size - show_hidden, hidden_size):
# for j in range(num_classes):
# g.edge(f'hidden_{i}', f'output_{j}')
# Add edges from last hidden layer to output layer
for i in range(show_hidden):
for j in range(num_classes):
g.edge(f'{previous_layer}_{i}', f'output_{j}')
for i in range(previous_layer_size - show_hidden, previous_layer_size):
for j in range(num_classes):
g.edge(f'{previous_layer}_{i}', f'output_{j}')
return g
if __name__ == '__main__':
g = draw_neural_net(7, [60, 60], 7)
output_path = g.render(view=False)
print(output_path)
os.system(f'explorer.exe neural_network.png')

55
test2.py Normal file
View File

@ -0,0 +1,55 @@
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
# 读取Excel文件
df = pd.read_excel('loss-metal-compress.xlsx')
# 假设你的模型需要的数据是前300行
data = df.iloc[300:600, 1].values
# 将数据转换为Tensor
data_tensor = torch.tensor(data, dtype=torch.float32).unsqueeze(0) # 增加一个批次维度
# 需要填充的0的数量
padding_size = 371 - data_tensor.size(1)
# 如果需要填充的0的数量大于0则进行填充
if padding_size > 0:
# 创建一个形状为[1, padding_size]的0张量
padding_tensor = torch.zeros(1, padding_size, dtype=torch.float32)
# 将原始数据和0张量拼接起来
data_tensor_padded = torch.cat((data_tensor, padding_tensor), dim=1)
else:
data_tensor_padded = data_tensor
# 包装成TensorDataset和DataLoader
dataset = TensorDataset(data_tensor_padded)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
# 确定设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = 'cpu'
print(f"Using device: {device}")
# 加载你的模型
model = torch.load('Sound.pth', map_location=device) # 确保模型加载到正确的设备
model.to(device) # 再次确保模型在正确的设备上
model.eval() # 设置为评估模式
# 进行预测
predictions = []
with torch.no_grad():
for batch in dataloader:
inputs = batch[0].to(device) # 将输入数据移动到相同的设备
outputs = model(inputs)
_, predicted = torch.max(outputs, 1)
predictions.extend(predicted.cpu().numpy()) # 将预测结果移动回CPU并转换为numpy数组
# 打印预测结果
print(predictions)