diff --git a/Qtorch/Functions/__init__.py b/Qtorch/Functions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Qfunctions/divSet.py b/Qtorch/Functions/divSet.py similarity index 100% rename from Qfunctions/divSet.py rename to Qtorch/Functions/divSet.py diff --git a/Qtorch/Models/Qcnn.py b/Qtorch/Models/Qcnn.py index 8aeefd1..8483fff 100644 --- a/Qtorch/Models/Qcnn.py +++ b/Qtorch/Models/Qcnn.py @@ -1,6 +1,8 @@ import torch import torch.nn as nn from sklearn.preprocessing import LabelEncoder +from torch.utils.data import DataLoader, TensorDataset + from Qtorch.Models.Qnn import Qnn class QCNN(Qnn): @@ -58,3 +60,22 @@ class QCNN(Qnn): nn.init.xavier_uniform_(m.weight) if m.bias is not None: m.bias.data.fill_(0.01) + + def _prepare_data(self): + + # 将data转换为tensor形式, unsqueeze可以创建多一维度的,给 + X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32).unsqueeze(1) + self.y_train = self.LABEL_ENCODER.fit_transform(self.y_train) + y_train_tensor = torch.tensor(self.y_train, dtype=torch.long) + + X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32).unsqueeze(1) + self.y_test = self.LABEL_ENCODER.transform(self.y_test) + y_test_tensor = torch.tensor(self.y_test, dtype=torch.long) + + train_dataset = TensorDataset(X_train_tensor, y_train_tensor) + test_dataset = TensorDataset(X_test_tensor, y_test_tensor) + + train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) + test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) + + return train_loader, test_loader diff --git a/Qtorch/Models/Qmlp.py b/Qtorch/Models/Qmlp.py index 72b42a9..4317e0b 100644 --- a/Qtorch/Models/Qmlp.py +++ b/Qtorch/Models/Qmlp.py @@ -1,41 +1,37 @@ +import numpy as np import torch.nn as nn from Qtorch.Models.Qnn import Qnn -from sklearn.preprocessing import LabelEncoder class Qmlp(Qnn): - def __init__(self, X_train, y_train, X_test, y_test, + def __init__(self, data, hidden_layers, labels=None, - dropout_rate=0.3 + dropout_rate=0.3, + test_size = 0.2, + random_state=None ): - super(Qmlp, self).__init__() + super(Qmlp, self).__init__(data=data, labels=labels, test_size=test_size, random_state=random_state) - self.LABEL_ENCODER = LabelEncoder() - - self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test - - self.labels = labels - - input_size = X_train.shape[1] - num_classes = len(set(y_train)) + input_size = self.X_train.shape[1] + num_classes = len(np.unique(self.y_train)) self.layers = nn.ModuleList() - # Input layer to first hidden layer + # 连接输入层和第一个隐藏层 self.layers.append(nn.Linear(input_size, hidden_layers[0])) self.layers.append(nn.BatchNorm1d(hidden_layers[0])) self.layers.append(nn.ReLU()) self.layers.append(nn.Dropout(dropout_rate)) - # Create hidden layers + # 创建隐藏层 for i in range(1, len(hidden_layers)): self.layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i])) self.layers.append(nn.BatchNorm1d(hidden_layers[i])) self.layers.append(nn.ReLU()) self.layers.append(nn.Dropout(dropout_rate)) - # Output layer + # 创建输出层 self.layers.append(nn.Linear(hidden_layers[-1], num_classes)) self.__init_weights() diff --git a/Qtorch/Models/Qnn.py b/Qtorch/Models/Qnn.py index ca646be..6f9b897 100644 --- a/Qtorch/Models/Qnn.py +++ b/Qtorch/Models/Qnn.py @@ -5,24 +5,27 @@ from sklearn.decomposition import PCA from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score from torch.utils.data import DataLoader, TensorDataset -# from Qfunctions.divSet import divSet as ds +from Qtorch import divSet as DS # from Qfunctions.saveToxlsx import save_to_xlsx as stx class Qnn(nn.Module): - def __init__(self): + def __init__(self, data, labels, test_size = 0.2, random_state=None): super(Qnn, self).__init__() + # 使用gpu进行加速, 没有gpu的话使用CPU self.DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - self.X_train, self.y_train, self.X_test, self.y_test = None, None, None, None + # 划分测试集和训练集 + self.X_train, self.X_test, self.y_train, self.y_test, self.LABEL_ENCODER = DS( + data=data, labels=labels, test_size=test_size, random_state=random_state + ) - self.labels = None - - self.LABEL_ENCODER = None + self.labels = labels + # 存储过程数据的文件 self.epoch_data = { 'epoch': [], 'train_loss': [], @@ -33,11 +36,12 @@ class Qnn(nn.Module): 'f1_score': [] } + # PCA 图片数据存储 self.pca_2d, self.pca_3d = None, None self.cm, self.cmn = None, None - def __prepare_data(self): + def _prepare_data(self): # 将data转换为tensor形式 X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32) @@ -56,7 +60,7 @@ class Qnn(nn.Module): return train_loader, test_loader - def __train_model(self, train_loader, test_loader, epochs_times=100): + def _train_model(self, train_loader, test_loader, epochs_times=100): model = self.to(self.DEVICE) @@ -66,7 +70,9 @@ class Qnn(nn.Module): best_test_accuracy = 0 patience = 100 counter = 0 - accuracy_threshold = 0.99 # 99% 的准确率阈值 + + # 99% 的准确率阈值 + accuracy_threshold = 0.99 for epoch in range(epochs_times): @@ -149,11 +155,12 @@ class Qnn(nn.Module): return def fit(self, epoch_times = 100): - train_loader, test_loader = self.__prepare_data() - self.__train_model(train_loader, test_loader, epochs_times=epoch_times) + train_loader, test_loader = self._prepare_data() + self._train_model(train_loader, test_loader, epochs_times=epoch_times) return - def get_PCA(self): + # 外部获取PCA图像数据的接口 + def get_PCA(self): # PCA 2D 图像 pca_2d = PCA(n_components=2) # 保留两个主成分 @@ -162,19 +169,21 @@ class Qnn(nn.Module): df_pca2d['labels'] = self.y_train # PCA 3D 图像 - pca_3d = PCA(n_components=3) + pca_3d = PCA(n_components=3) # 保留三个主成分 principalComponents = pca_3d.fit_transform(self.X_train) df_pca3d = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3']) df_pca3d['labels'] = self.y_train return df_pca2d, df_pca3d + # 外部获取混淆矩阵的接口 def get_cm(self): return pd.DataFrame(self.cm, columns=self.labels, index=self.labels) def get_cmn(self): return pd.DataFrame(self.cmn, columns=self.labels, index=self.labels) + # 外部获取迭代数据的接口 def get_epoch_data(self): return pd.DataFrame(self.epoch_data) diff --git a/Qtorch/__init__.py b/Qtorch/__init__.py index e69de29..f75a808 100644 --- a/Qtorch/__init__.py +++ b/Qtorch/__init__.py @@ -0,0 +1,3 @@ +# Qtorch/__init__.py +from .Functions.divSet import divSet +from .Models import Qnn, Qmlp, Qcnn \ No newline at end of file diff --git a/README.md b/README.md index f1a6ca8..2c30edc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,18 @@ -#README + +## Preliminary +1. 对于每一个类,将数据如下处理, 保存成xlsx或者xls文件 + +| | | | | +|-------|-------|-------|-------| +| arbitrary value | value | arbitrary value | vlaue | +| arbitrary value | value | arbitrary value | vlaue | + +即偶数列为一次循环的数据,奇数列为任意值即可 + +2. 配置conda环境 +> pass + + ## Quickly Start 1. 将项目文件夹编辑成**日期+项目名** 2. 编辑好label名称,label名称命名变成英文或者数字 @@ -17,9 +31,6 @@ label_names = ['Acrlic', 'Ecoflex', 'PDMS', 'PLA', 'Wood'] # 使用库 divSet 划分训练集和数据集 data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx') - X_train, X_test, y_train, y_test, encoder = divSet( - data=data, labels=label_names, test_size= 0.3 - ) ``` 5. 创建神经网络类 @@ -45,4 +56,16 @@ save_to_xlsx(project_name=projet_name, file_name="cm", data=cm ) save_to_xlsx(project_name=projet_name, file_name="acc_and_loss", data=epoch_data) ``` - +## Advanced +### loadData 处理数据工具的使用 +||参数类型|默认值|参数作用| +|---|---|---|---| +|folder|str|必填项|指定数据存放在Static下的哪个文件夹| +|lableNames| list| 必填项| 指定每一个类的label名称, 既可以用来读取相应的文件,也可以用来给label排序| +|isDir| bool| True| 若是上文Quickly Strat章节2.1情况需要改成False,2.2情况则是True| +|fileClass| str| 'xlsx'| 数据文件的后缀| +> tips: 数据读取是按照一下情况读取的(2.1和2.2是Quickly Start章节的2.1和2.2简称): +> 2.1情况的第一类数据读取的地址是 ./Static/folder/labelsNames[0].xlsx, 其他类同理 +> 2.2情况的第二类数据读取的地址是 ./Static/folder/labelsNames[0]/*.xlsx, 其他同理 +### Qmlp 模型使用 +> pass diff --git a/main.py b/main.py index 92a1c81..03943ce 100644 --- a/main.py +++ b/main.py @@ -20,7 +20,6 @@ def main(): hidden_layers = [128, 256, 128], dropout_rate=0 ) - # model = QCNN( # X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, # dropout_rate=0