From 6e99f6caa83df76b49d39b1c2e881273d9a92367 Mon Sep 17 00:00:00 2001 From: newbie Date: Thu, 28 Nov 2024 13:40:20 +0800 Subject: [PATCH] remove unnecessary code --- Qfunctions/loaData.py | 29 ++++++++--------------------- main.py | 8 +++----- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/Qfunctions/loaData.py b/Qfunctions/loaData.py index 397354c..0024c1c 100644 --- a/Qfunctions/loaData.py +++ b/Qfunctions/loaData.py @@ -5,7 +5,7 @@ STATIC_PATH = './Static' # 从文件夹中读取所有xlsx文件,每个文件对应一个label # labelNames为label的名字,如果不提供则默认为文件名 -def load_data(folder, labelNames, isDir): +def load_data(folder, labelNames, isDir=True, fileClass='xlsx'): # 检查folder参数 if folder is None: raise ValueError("The 'folder' parameter is required.") @@ -20,34 +20,21 @@ def load_data(folder, labelNames, isDir): if not os.path.isdir(folder): raise ValueError(f"The folder '{folder}' does not exist.") -# fileNames = [f for f in os.listdir(folder) if f.endswith('.xlsx')] - -# # 获取数据的最大行数 -# max_row_length = get_max_row_len(folder, fileNames) - -# all_features = [] - -# for i, fileName in enumerate(fileNames): - -# features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero') -# all_features.append(features) - -# data = pd.concat(all_features, ignore_index = True) - data = None if not isDir: - data = load_from_file(folder=folder, labelNames=labelNames) + data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass) else: - data = load_from_folder(folder=folder, labelNames=labelNames) + data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass) print(data) return data -def load_from_folder(folder, labelNames): +def load_from_folder(folder, labelNames, fileClass): all_features = [] + fileClass = '.' + fileClass for labelName in labelNames: subfolder = os.path.join(folder, labelName) if os.path.exists(subfolder) and os.path.isdir(subfolder): - fileNames = [f for f in os.listdir(subfolder) if f.endswith('.xlsx')] + fileNames = [f for f in os.listdir(subfolder) if f.endswith(fileClass)] max_row_length = get_max_row_len(subfolder, fileNames) features = [] for fileName in fileNames: @@ -58,8 +45,8 @@ def load_from_folder(folder, labelNames): # 将所有标签的数据合并 return pd.concat(all_features, ignore_index=True) -def load_from_file(folder, labelNames): - fileNames = [labelName + ".xlsx" for labelName in labelNames] +def load_from_file(folder, labelNames, fileClass): + fileNames = [labelName + "." + fileClass for labelName in labelNames] # 获取数据的最大行数 max_row_length = get_max_row_len(folder, fileNames) all_features = [] diff --git a/main.py b/main.py index 6d62518..ee34bc6 100644 --- a/main.py +++ b/main.py @@ -5,15 +5,13 @@ from Qfunctions.loaData import load_data as dLoader from sklearn.decomposition import PCA def main(): - projet_name = '20241009MaterialDiv' - label_names =["Acrylic", "Ecoflex", "PDMS", "PLA", "Wood"] - data = dLoader(projet_name, label_names, isDir=True) + projet_name = '###########' # 输入元数据文件夹名称 + label_names =[] # 请在[]内输入每一个分类的名称 + data = dLoader(projet_name, label_names, isDir=False) X_train, X_test, y_train, y_test, encoder = divSet( data=data, labels=label_names, test_size= 0.3 ) - print(y_train) - import pandas as pd pca = PCA(n_components=2) # 保留两个主成分 principalComponents = pca.fit_transform(X_train)