remove unnecessary code
This commit is contained in:
parent
f54ce32b7a
commit
6e99f6caa8
|
|
@ -5,7 +5,7 @@ STATIC_PATH = './Static'
|
|||
|
||||
# 从文件夹中读取所有xlsx文件,每个文件对应一个label
|
||||
# labelNames为label的名字,如果不提供则默认为文件名
|
||||
def load_data(folder, labelNames, isDir):
|
||||
def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
|
||||
# 检查folder参数
|
||||
if folder is None:
|
||||
raise ValueError("The 'folder' parameter is required.")
|
||||
|
|
@ -20,34 +20,21 @@ def load_data(folder, labelNames, isDir):
|
|||
if not os.path.isdir(folder):
|
||||
raise ValueError(f"The folder '{folder}' does not exist.")
|
||||
|
||||
# fileNames = [f for f in os.listdir(folder) if f.endswith('.xlsx')]
|
||||
|
||||
# # 获取数据的最大行数
|
||||
# max_row_length = get_max_row_len(folder, fileNames)
|
||||
|
||||
# all_features = []
|
||||
|
||||
# for i, fileName in enumerate(fileNames):
|
||||
|
||||
# features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero')
|
||||
# all_features.append(features)
|
||||
|
||||
# data = pd.concat(all_features, ignore_index = True)
|
||||
|
||||
data = None
|
||||
if not isDir:
|
||||
data = load_from_file(folder=folder, labelNames=labelNames)
|
||||
data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass)
|
||||
else:
|
||||
data = load_from_folder(folder=folder, labelNames=labelNames)
|
||||
data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass)
|
||||
print(data)
|
||||
return data
|
||||
|
||||
def load_from_folder(folder, labelNames):
|
||||
def load_from_folder(folder, labelNames, fileClass):
|
||||
all_features = []
|
||||
fileClass = '.' + fileClass
|
||||
for labelName in labelNames:
|
||||
subfolder = os.path.join(folder, labelName)
|
||||
if os.path.exists(subfolder) and os.path.isdir(subfolder):
|
||||
fileNames = [f for f in os.listdir(subfolder) if f.endswith('.xlsx')]
|
||||
fileNames = [f for f in os.listdir(subfolder) if f.endswith(fileClass)]
|
||||
max_row_length = get_max_row_len(subfolder, fileNames)
|
||||
features = []
|
||||
for fileName in fileNames:
|
||||
|
|
@ -58,8 +45,8 @@ def load_from_folder(folder, labelNames):
|
|||
# 将所有标签的数据合并
|
||||
return pd.concat(all_features, ignore_index=True)
|
||||
|
||||
def load_from_file(folder, labelNames):
|
||||
fileNames = [labelName + ".xlsx" for labelName in labelNames]
|
||||
def load_from_file(folder, labelNames, fileClass):
|
||||
fileNames = [labelName + "." + fileClass for labelName in labelNames]
|
||||
# 获取数据的最大行数
|
||||
max_row_length = get_max_row_len(folder, fileNames)
|
||||
all_features = []
|
||||
|
|
|
|||
8
main.py
8
main.py
|
|
@ -5,15 +5,13 @@ from Qfunctions.loaData import load_data as dLoader
|
|||
from sklearn.decomposition import PCA
|
||||
|
||||
def main():
|
||||
projet_name = '20241009MaterialDiv'
|
||||
label_names =["Acrylic", "Ecoflex", "PDMS", "PLA", "Wood"]
|
||||
data = dLoader(projet_name, label_names, isDir=True)
|
||||
projet_name = '###########' # 输入元数据文件夹名称
|
||||
label_names =[] # 请在[]内输入每一个分类的名称
|
||||
data = dLoader(projet_name, label_names, isDir=False)
|
||||
X_train, X_test, y_train, y_test, encoder = divSet(
|
||||
data=data, labels=label_names, test_size= 0.3
|
||||
)
|
||||
|
||||
print(y_train)
|
||||
|
||||
import pandas as pd
|
||||
pca = PCA(n_components=2) # 保留两个主成分
|
||||
principalComponents = pca.fit_transform(X_train)
|
||||
|
|
|
|||
Loading…
Reference in New Issue