update divSet
This commit is contained in:
parent
03fe15fcbb
commit
2c9169273f
|
|
@ -1,6 +1,8 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
|
||||
from Qtorch.Models.Qnn import Qnn
|
||||
|
||||
class QCNN(Qnn):
|
||||
|
|
@ -58,3 +60,22 @@ class QCNN(Qnn):
|
|||
nn.init.xavier_uniform_(m.weight)
|
||||
if m.bias is not None:
|
||||
m.bias.data.fill_(0.01)
|
||||
|
||||
def _prepare_data(self):
|
||||
|
||||
# 将data转换为tensor形式, unsqueeze可以创建多一维度的,给
|
||||
X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32).unsqueeze(1)
|
||||
self.y_train = self.LABEL_ENCODER.fit_transform(self.y_train)
|
||||
y_train_tensor = torch.tensor(self.y_train, dtype=torch.long)
|
||||
|
||||
X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32).unsqueeze(1)
|
||||
self.y_test = self.LABEL_ENCODER.transform(self.y_test)
|
||||
y_test_tensor = torch.tensor(self.y_test, dtype=torch.long)
|
||||
|
||||
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
||||
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
|
||||
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
|
||||
|
||||
return train_loader, test_loader
|
||||
|
|
|
|||
|
|
@ -1,41 +1,37 @@
|
|||
import numpy as np
|
||||
import torch.nn as nn
|
||||
from Qtorch.Models.Qnn import Qnn
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
|
||||
|
||||
class Qmlp(Qnn):
|
||||
|
||||
def __init__(self, X_train, y_train, X_test, y_test,
|
||||
def __init__(self, data,
|
||||
hidden_layers,
|
||||
labels=None,
|
||||
dropout_rate=0.3
|
||||
dropout_rate=0.3,
|
||||
test_size = 0.2,
|
||||
random_state=None
|
||||
):
|
||||
super(Qmlp, self).__init__()
|
||||
super(Qmlp, self).__init__(data=data, labels=labels, test_size=test_size, random_state=random_state)
|
||||
|
||||
self.LABEL_ENCODER = LabelEncoder()
|
||||
|
||||
self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test
|
||||
|
||||
self.labels = labels
|
||||
|
||||
input_size = X_train.shape[1]
|
||||
num_classes = len(set(y_train))
|
||||
input_size = self.X_train.shape[1]
|
||||
num_classes = len(np.unique(self.y_train))
|
||||
self.layers = nn.ModuleList()
|
||||
|
||||
# Input layer to first hidden layer
|
||||
# 连接输入层和第一个隐藏层
|
||||
self.layers.append(nn.Linear(input_size, hidden_layers[0]))
|
||||
self.layers.append(nn.BatchNorm1d(hidden_layers[0]))
|
||||
self.layers.append(nn.ReLU())
|
||||
self.layers.append(nn.Dropout(dropout_rate))
|
||||
|
||||
# Create hidden layers
|
||||
# 创建隐藏层
|
||||
for i in range(1, len(hidden_layers)):
|
||||
self.layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
|
||||
self.layers.append(nn.BatchNorm1d(hidden_layers[i]))
|
||||
self.layers.append(nn.ReLU())
|
||||
self.layers.append(nn.Dropout(dropout_rate))
|
||||
|
||||
# Output layer
|
||||
# 创建输出层
|
||||
self.layers.append(nn.Linear(hidden_layers[-1], num_classes))
|
||||
self.__init_weights()
|
||||
|
||||
|
|
|
|||
|
|
@ -5,24 +5,27 @@ from sklearn.decomposition import PCA
|
|||
from sklearn.metrics import confusion_matrix
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
|
||||
# from Qfunctions.divSet import divSet as ds
|
||||
from Qtorch import divSet as DS
|
||||
# from Qfunctions.saveToxlsx import save_to_xlsx as stx
|
||||
|
||||
|
||||
class Qnn(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, data, labels, test_size = 0.2, random_state=None):
|
||||
|
||||
super(Qnn, self).__init__()
|
||||
|
||||
# 使用gpu进行加速, 没有gpu的话使用CPU
|
||||
self.DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
self.X_train, self.y_train, self.X_test, self.y_test = None, None, None, None
|
||||
# 划分测试集和训练集
|
||||
self.X_train, self.X_test, self.y_train, self.y_test, self.LABEL_ENCODER = DS(
|
||||
data=data, labels=labels, test_size=test_size, random_state=random_state
|
||||
)
|
||||
|
||||
self.labels = None
|
||||
|
||||
self.LABEL_ENCODER = None
|
||||
self.labels = labels
|
||||
|
||||
# 存储过程数据的文件
|
||||
self.epoch_data = {
|
||||
'epoch': [],
|
||||
'train_loss': [],
|
||||
|
|
@ -30,17 +33,18 @@ class Qnn(nn.Module):
|
|||
'test_accuracy': []
|
||||
}
|
||||
|
||||
# PCA 图片数据存储
|
||||
self.pca_2d, self.pca_3d = None, None
|
||||
|
||||
|
||||
def __prepare_data(self):
|
||||
def _prepare_data(self):
|
||||
|
||||
# 将data转换为tensor形式
|
||||
X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32).unsqueeze(1)
|
||||
X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32)
|
||||
self.y_train = self.LABEL_ENCODER.fit_transform(self.y_train)
|
||||
y_train_tensor = torch.tensor(self.y_train, dtype=torch.long)
|
||||
|
||||
X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32).unsqueeze(1)
|
||||
X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32)
|
||||
self.y_test = self.LABEL_ENCODER.transform(self.y_test)
|
||||
y_test_tensor = torch.tensor(self.y_test, dtype=torch.long)
|
||||
|
||||
|
|
@ -52,7 +56,7 @@ class Qnn(nn.Module):
|
|||
|
||||
return train_loader, test_loader
|
||||
|
||||
def __train_model(self, train_loader, test_loader, epochs_times=100):
|
||||
def _train_model(self, train_loader, test_loader, epochs_times=100):
|
||||
|
||||
model = self.to(self.DEVICE)
|
||||
|
||||
|
|
@ -62,7 +66,9 @@ class Qnn(nn.Module):
|
|||
best_test_accuracy = 0
|
||||
patience = 100
|
||||
counter = 0
|
||||
accuracy_threshold = 0.99 # 99% 的准确率阈值
|
||||
|
||||
# 99% 的准确率阈值
|
||||
accuracy_threshold = 0.99
|
||||
|
||||
for epoch in range(epochs_times):
|
||||
|
||||
|
|
@ -130,10 +136,11 @@ class Qnn(nn.Module):
|
|||
return
|
||||
|
||||
def fit(self, epoch_times = 100):
|
||||
train_loader, test_loader = self.__prepare_data()
|
||||
self.__train_model(train_loader, test_loader, epochs_times=epoch_times)
|
||||
train_loader, test_loader = self._prepare_data()
|
||||
self._train_model(train_loader, test_loader, epochs_times=epoch_times)
|
||||
return
|
||||
|
||||
# 外部获取PCA图像数据的接口
|
||||
def get_PCA(self):
|
||||
|
||||
# PCA 2D 图像
|
||||
|
|
@ -143,16 +150,18 @@ class Qnn(nn.Module):
|
|||
df_pca2d['labels'] = self.y_train
|
||||
|
||||
# PCA 3D 图像
|
||||
pca_3d = PCA(n_components=3)
|
||||
pca_3d = PCA(n_components=3) # 保留三个主成分
|
||||
principalComponents = pca_3d.fit_transform(self.X_train)
|
||||
df_pca3d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3'])
|
||||
df_pca3d['labels'] = self.y_train
|
||||
|
||||
return df_pca2d, df_pca3d
|
||||
|
||||
# 外部获取混淆矩阵的接口
|
||||
def get_cm(self):
|
||||
return pd.DataFrame(self.cm, columns=self.labels, index=self.labels)
|
||||
|
||||
# 外部获取迭代数据的接口
|
||||
def get_epoch_data(self):
|
||||
return pd.DataFrame(self.epoch_data)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
# Qtorch/__init__.py
|
||||
from .Functions.divSet import divSet
|
||||
from .Models import Qnn, Qmlp, Qcnn
|
||||
33
README.md
33
README.md
|
|
@ -1,4 +1,18 @@
|
|||
#README
|
||||
|
||||
## Preliminary
|
||||
1. 对于每一个类,将数据如下处理, 保存成xlsx或者xls文件
|
||||
|
||||
| | | | |
|
||||
|-------|-------|-------|-------|
|
||||
| arbitrary value | value | arbitrary value | vlaue |
|
||||
| arbitrary value | value | arbitrary value | vlaue |
|
||||
|
||||
即偶数列为一次循环的数据,奇数列为任意值即可
|
||||
|
||||
2. 配置conda环境
|
||||
> pass
|
||||
|
||||
|
||||
## Quickly Start
|
||||
1. 将项目文件夹编辑成**日期+项目名**
|
||||
2. 编辑好label名称,label名称命名变成英文或者数字
|
||||
|
|
@ -17,9 +31,6 @@
|
|||
label_names = ['Acrlic', 'Ecoflex', 'PDMS', 'PLA', 'Wood']
|
||||
# 使用库 divSet 划分训练集和数据集
|
||||
data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx')
|
||||
X_train, X_test, y_train, y_test, encoder = divSet(
|
||||
data=data, labels=label_names, test_size= 0.3
|
||||
)
|
||||
```
|
||||
|
||||
5. 创建神经网络类
|
||||
|
|
@ -45,4 +56,16 @@
|
|||
save_to_xlsx(project_name=projet_name, file_name="cm", data=cm )
|
||||
save_to_xlsx(project_name=projet_name, file_name="acc_and_loss", data=epoch_data)
|
||||
```
|
||||
|
||||
## Advanced
|
||||
### loadData 处理数据工具的使用
|
||||
||参数类型|默认值|参数作用|
|
||||
|---|---|---|---|
|
||||
|folder|str|必填项|指定数据存放在Static下的哪个文件夹|
|
||||
|lableNames| list| 必填项| 指定每一个类的label名称, 既可以用来读取相应的文件,也可以用来给label排序|
|
||||
|isDir| bool| True| 若是上文Quickly Strat章节2.1情况需要改成False,2.2情况则是True|
|
||||
|fileClass| str| 'xlsx'| 数据文件的后缀|
|
||||
> tips: 数据读取是按照一下情况读取的(2.1和2.2是Quickly Start章节的2.1和2.2简称):
|
||||
> 2.1情况的第一类数据读取的地址是 ./Static/folder/labelsNames[0].xlsx, 其他类同理
|
||||
> 2.2情况的第二类数据读取的地址是 ./Static/folder/labelsNames[0]/*.xlsx, 其他同理
|
||||
### Qmlp 模型使用
|
||||
> pass
|
||||
|
|
|
|||
26
main.py
26
main.py
|
|
@ -1,30 +1,28 @@
|
|||
from Qtorch.Models.Qmlp import Qmlp
|
||||
from Qtorch.Models.Qcnn import QCNN
|
||||
from Qfunctions.divSet import divSet
|
||||
from Qfunctions.loaData import load_data
|
||||
from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx
|
||||
import string
|
||||
|
||||
def main():
|
||||
projet_name = '20241130 EMG-write' # 输入元数据文件夹名称
|
||||
label_names = list(string.ascii_uppercase) # 请在[]内输入每一个分类的名称
|
||||
projet_name = '20240821Sound' # 输入元数据文件夹名称
|
||||
label_names = ['flim', 'nano', 'pressure', 'sensor', 'water'] # 请在[]内输入每一个分类的名称
|
||||
|
||||
print(label_names)
|
||||
data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx')
|
||||
X_train, X_test, y_train, y_test, encoder = divSet(
|
||||
data=data, labels=label_names, test_size= 0.3
|
||||
)
|
||||
|
||||
# model = Qmlp(
|
||||
# X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
|
||||
# hidden_layers = [128],
|
||||
# dropout_rate=0
|
||||
# )
|
||||
|
||||
model = QCNN(
|
||||
X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
|
||||
model = Qmlp(
|
||||
data=data,
|
||||
labels=label_names,
|
||||
hidden_layers = [128],
|
||||
dropout_rate=0
|
||||
)
|
||||
|
||||
# model = QCNN(
|
||||
# X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
|
||||
# dropout_rate=0
|
||||
# )
|
||||
|
||||
pca_2d, pca_3d = model.get_PCA()
|
||||
|
||||
model.fit(300)
|
||||
|
|
|
|||
Loading…
Reference in New Issue