remove unnecessary code
This commit is contained in:
parent
f54ce32b7a
commit
6e99f6caa8
|
|
@ -5,7 +5,7 @@ STATIC_PATH = './Static'
|
||||||
|
|
||||||
# 从文件夹中读取所有xlsx文件,每个文件对应一个label
|
# 从文件夹中读取所有xlsx文件,每个文件对应一个label
|
||||||
# labelNames为label的名字,如果不提供则默认为文件名
|
# labelNames为label的名字,如果不提供则默认为文件名
|
||||||
def load_data(folder, labelNames, isDir):
|
def load_data(folder, labelNames, isDir=True, fileClass='xlsx'):
|
||||||
# 检查folder参数
|
# 检查folder参数
|
||||||
if folder is None:
|
if folder is None:
|
||||||
raise ValueError("The 'folder' parameter is required.")
|
raise ValueError("The 'folder' parameter is required.")
|
||||||
|
|
@ -20,34 +20,21 @@ def load_data(folder, labelNames, isDir):
|
||||||
if not os.path.isdir(folder):
|
if not os.path.isdir(folder):
|
||||||
raise ValueError(f"The folder '{folder}' does not exist.")
|
raise ValueError(f"The folder '{folder}' does not exist.")
|
||||||
|
|
||||||
# fileNames = [f for f in os.listdir(folder) if f.endswith('.xlsx')]
|
|
||||||
|
|
||||||
# # 获取数据的最大行数
|
|
||||||
# max_row_length = get_max_row_len(folder, fileNames)
|
|
||||||
|
|
||||||
# all_features = []
|
|
||||||
|
|
||||||
# for i, fileName in enumerate(fileNames):
|
|
||||||
|
|
||||||
# features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero')
|
|
||||||
# all_features.append(features)
|
|
||||||
|
|
||||||
# data = pd.concat(all_features, ignore_index = True)
|
|
||||||
|
|
||||||
data = None
|
data = None
|
||||||
if not isDir:
|
if not isDir:
|
||||||
data = load_from_file(folder=folder, labelNames=labelNames)
|
data = load_from_file(folder=folder, labelNames=labelNames, fileClass=fileClass)
|
||||||
else:
|
else:
|
||||||
data = load_from_folder(folder=folder, labelNames=labelNames)
|
data = load_from_folder(folder=folder, labelNames=labelNames, fileClass=fileClass)
|
||||||
print(data)
|
print(data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def load_from_folder(folder, labelNames):
|
def load_from_folder(folder, labelNames, fileClass):
|
||||||
all_features = []
|
all_features = []
|
||||||
|
fileClass = '.' + fileClass
|
||||||
for labelName in labelNames:
|
for labelName in labelNames:
|
||||||
subfolder = os.path.join(folder, labelName)
|
subfolder = os.path.join(folder, labelName)
|
||||||
if os.path.exists(subfolder) and os.path.isdir(subfolder):
|
if os.path.exists(subfolder) and os.path.isdir(subfolder):
|
||||||
fileNames = [f for f in os.listdir(subfolder) if f.endswith('.xlsx')]
|
fileNames = [f for f in os.listdir(subfolder) if f.endswith(fileClass)]
|
||||||
max_row_length = get_max_row_len(subfolder, fileNames)
|
max_row_length = get_max_row_len(subfolder, fileNames)
|
||||||
features = []
|
features = []
|
||||||
for fileName in fileNames:
|
for fileName in fileNames:
|
||||||
|
|
@ -58,8 +45,8 @@ def load_from_folder(folder, labelNames):
|
||||||
# 将所有标签的数据合并
|
# 将所有标签的数据合并
|
||||||
return pd.concat(all_features, ignore_index=True)
|
return pd.concat(all_features, ignore_index=True)
|
||||||
|
|
||||||
def load_from_file(folder, labelNames):
|
def load_from_file(folder, labelNames, fileClass):
|
||||||
fileNames = [labelName + ".xlsx" for labelName in labelNames]
|
fileNames = [labelName + "." + fileClass for labelName in labelNames]
|
||||||
# 获取数据的最大行数
|
# 获取数据的最大行数
|
||||||
max_row_length = get_max_row_len(folder, fileNames)
|
max_row_length = get_max_row_len(folder, fileNames)
|
||||||
all_features = []
|
all_features = []
|
||||||
|
|
|
||||||
8
main.py
8
main.py
|
|
@ -5,15 +5,13 @@ from Qfunctions.loaData import load_data as dLoader
|
||||||
from sklearn.decomposition import PCA
|
from sklearn.decomposition import PCA
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
projet_name = '20241009MaterialDiv'
|
projet_name = '###########' # 输入元数据文件夹名称
|
||||||
label_names =["Acrylic", "Ecoflex", "PDMS", "PLA", "Wood"]
|
label_names =[] # 请在[]内输入每一个分类的名称
|
||||||
data = dLoader(projet_name, label_names, isDir=True)
|
data = dLoader(projet_name, label_names, isDir=False)
|
||||||
X_train, X_test, y_train, y_test, encoder = divSet(
|
X_train, X_test, y_train, y_test, encoder = divSet(
|
||||||
data=data, labels=label_names, test_size= 0.3
|
data=data, labels=label_names, test_size= 0.3
|
||||||
)
|
)
|
||||||
|
|
||||||
print(y_train)
|
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
pca = PCA(n_components=2) # 保留两个主成分
|
pca = PCA(n_components=2) # 保留两个主成分
|
||||||
principalComponents = pca.fit_transform(X_train)
|
principalComponents = pca.fit_transform(X_train)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue