remove unnecessary code

This commit is contained in:
newbie 2024-11-28 13:40:20 +08:00
parent f54ce32b7a
commit 6e99f6caa8
2 changed files with 11 additions and 26 deletions

View File

@ -5,7 +5,7 @@ STATIC_PATH = './Static'
# 从文件夹中读取所有xlsx文件每个文件对应一个label
# labelNames为label的名字如果不提供则默认为文件名
def load_data(folder, labelNames, isDir):
def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
# 检查folder参数
if folder is None:
raise ValueError("The 'folder' parameter is required.")
@ -20,34 +20,21 @@ def load_data(folder, labelNames, isDir):
if not os.path.isdir(folder):
raise ValueError(f"The folder '{folder}' does not exist.")
# fileNames = [f for f in os.listdir(folder) if f.endswith('.xlsx')]
# # 获取数据的最大行数
# max_row_length = get_max_row_len(folder, fileNames)
# all_features = []
# for i, fileName in enumerate(fileNames):
# features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero')
# all_features.append(features)
# data = pd.concat(all_features, ignore_index = True)
data = None
if not isDir:
data = load_from_file(folder=folder, labelNames=labelNames)
data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass)
else:
data = load_from_folder(folder=folder, labelNames=labelNames)
data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass)
print(data)
return data
def load_from_folder(folder, labelNames):
def load_from_folder(folder, labelNames, fileClass):
all_features = []
fileClass = '.' + fileClass
for labelName in labelNames:
subfolder = os.path.join(folder, labelName)
if os.path.exists(subfolder) and os.path.isdir(subfolder):
fileNames = [f for f in os.listdir(subfolder) if f.endswith('.xlsx')]
fileNames = [f for f in os.listdir(subfolder) if f.endswith(fileClass)]
max_row_length = get_max_row_len(subfolder, fileNames)
features = []
for fileName in fileNames:
@ -58,8 +45,8 @@ def load_from_folder(folder, labelNames):
# 将所有标签的数据合并
return pd.concat(all_features, ignore_index=True)
def load_from_file(folder, labelNames):
fileNames = [labelName + ".xlsx" for labelName in labelNames]
def load_from_file(folder, labelNames, fileClass):
fileNames = [labelName + "." + fileClass for labelName in labelNames]
# 获取数据的最大行数
max_row_length = get_max_row_len(folder, fileNames)
all_features = []

View File

@ -5,15 +5,13 @@ from Qfunctions.loaData import load_data as dLoader
from sklearn.decomposition import PCA
def main():
projet_name = '20241009MaterialDiv'
label_names =["Acrylic", "Ecoflex", "PDMS", "PLA", "Wood"]
data = dLoader(projet_name, label_names, isDir=True)
projet_name = '###########' # 输入元数据文件夹名称
label_names =[] # 请在[]内输入每一个分类的名称
data = dLoader(projet_name, label_names, isDir=False)
X_train, X_test, y_train, y_test, encoder = divSet(
data=data, labels=label_names, test_size= 0.3
)
print(y_train)
import pandas as pd
pca = PCA(n_components=2) # 保留两个主成分
principalComponents = pca.fit_transform(X_train)