20260109WZSX

This commit is contained in:
qyh1510@gmail.com 2026-02-28 00:44:50 +08:00
parent 3e07b4258f
commit 5da38b49b0
3 changed files with 274 additions and 18 deletions

View File

@ -1,4 +1,5 @@
import os
import unicodedata
import pandas as pd
STATIC_PATH = './Static'
@ -46,13 +47,34 @@ def load_from_folder(folder, labelNames, fileClass):
return pd.concat(all_features, ignore_index=True)
def load_from_file(folder, labelNames, fileClass):
fileNames = [labelName + "." + fileClass for labelName in labelNames]
# 获取数据的最大行数
max_row_length = get_max_row_len(folder, fileNames)
# 构建期望的文件名label + .扩展名并在目录中进行健壮匹配去除零宽字符、Unicode 规范化、大小写不敏感)
expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames]
actual_file_names = []
missing = []
for expected in expected_names:
match = _find_matching_file(folder, expected)
if match is None:
missing.append(expected)
else:
actual_file_names.append(match)
if missing:
available = sorted(os.listdir(folder))
raise FileNotFoundError(
"The following files were not found (after normalization): "
+ ", ".join(missing)
+ f". Available files: {available}"
)
# 获取数据的最大行数(使用实际匹配到的文件名)
max_row_length = get_max_row_len(folder, actual_file_names)
all_features = []
for i, fileName in enumerate(fileNames):
features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero')
all_features.append(features)
for i, fileName in enumerate(actual_file_names):
file_path = os.path.join(folder, fileName)
features = load_xlsx(file_path, labelNames[i], max_row_length, 'zero')
all_features.append(features)
return pd.concat(all_features, ignore_index = True)
@ -102,10 +124,65 @@ def fill_to_len(row, length = 1000, rule = None):
return pd.concat([row, fill_values], ignore_index=True)
def get_max_row_len(folder, filenames):
max_len = 0
for filename in filenames:
df = pd.read_excel(os.path.join(folder, filename))
max_len = max(max_len, df.shape[0])
return max_len
max_len = 0
for filename in filenames:
df = pd.read_excel(os.path.join(folder, filename))
max_len = max(max_len, df.shape[0])
return max_len
__all__ = ['load_data']
# ---------- 内部工具函数:处理包含零宽字符或不同 Unicode 形式的文件名匹配 ----------
def _strip_zero_width(s: str) -> str:
# 移除常见零宽字符U+200B, U+200C, U+200D, U+FEFF
if not isinstance(s, str):
return s
return s.translate({
0x200B: None, # ZERO WIDTH SPACE
0x200C: None, # ZERO WIDTH NON-JOINER
0x200D: None, # ZERO WIDTH JOINER
0xFEFF: None, # ZERO WIDTH NO-BREAK SPACE
})
def _canonicalize_name(name: str) -> str:
# 规范化到 NFKC并移除零宽字符
name = unicodedata.normalize('NFKC', name)
name = _strip_zero_width(name)
return name
def _normalize_for_compare(name: str) -> str:
# 进一步规范化用于宽松比较:
# - 统一大小写
# - 将下划线视为空格(与文件名用下划线代替空格的情况匹配)
# - 折叠所有空白为一个空格,并去除首尾空格
n = _canonicalize_name(name)
n = n.replace('_', ' ')
n = ' '.join(n.split())
return n.lower()
def _find_matching_file(folder: str, expected_name: str):
# 首先进行严格匹配(规范化后相等)
expected = _canonicalize_name(expected_name)
try:
entries = os.listdir(folder)
except FileNotFoundError:
return None
for f in entries:
if _canonicalize_name(f) == expected:
return f
# 次要策略:大小写不敏感比较
expected_lower = expected.lower()
for f in entries:
if _canonicalize_name(f).lower() == expected_lower:
return f
# 宽松策略:将下划线当作空格处理,并折叠空白(用于匹配 "Crocodile grain" vs "Crocodile_grain"
expected_relaxed = _normalize_for_compare(expected_name)
for f in entries:
if _normalize_for_compare(f) == expected_relaxed:
return f
return None

180
env.yml Normal file
View File

@ -0,0 +1,180 @@
name: Deeplearning
channels:
- pytorch
- nvidia
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- blas=1.0=mkl
- bottleneck=1.3.7=py312ha883a20_0
- brotli=1.0.9=h5eee18b_8
- brotli-bin=1.0.9=h5eee18b_8
- brotli-python=1.0.9=py312h6a678d5_8
- bzip2=1.0.8=h5eee18b_6
- ca-certificates=2024.11.26=h06a4308_0
- certifi=2024.12.14=py312h06a4308_0
- charset-normalizer=3.3.2=pyhd3eb1b0_0
- contourpy=1.2.0=py312hdb19cb5_0
- cuda-cudart=12.4.127=0
- cuda-cupti=12.4.127=0
- cuda-libraries=12.4.0=0
- cuda-nvrtc=12.4.127=0
- cuda-nvtx=12.4.127=0
- cuda-opencl=12.4.127=0
- cuda-runtime=12.4.0=0
- cudatoolkit=11.5.1=hcf5317a_9
- cycler=0.11.0=pyhd3eb1b0_0
- cyrus-sasl=2.1.28=h52b45da_1
- dbus=1.13.18=hb2f20db_0
- debugpy=1.6.7=py312h6a678d5_0
- et_xmlfile=1.1.0=py312h06a4308_1
- expat=2.6.2=h6a678d5_0
- ffmpeg=4.3=hf484d3e_0
- filelock=3.13.1=py312h06a4308_0
- fontconfig=2.14.1=h55d465d_3
- fonttools=4.51.0=py312h5eee18b_0
- freetype=2.12.1=h4a9f257_0
- glib=2.78.4=h6a678d5_0
- glib-tools=2.78.4=h6a678d5_0
- gmp=6.2.1=h295c915_3
- gnutls=3.6.15=he1e5248_0
- gst-plugins-base=1.14.1=h6a678d5_1
- gstreamer=1.14.1=h5eee18b_1
- icu=73.1=h6a678d5_0
- idna=3.7=py312h06a4308_0
- intel-openmp=2023.1.0=hdb19cb5_46306
- jinja2=3.1.4=py312h06a4308_0
- joblib=1.4.2=py312h06a4308_0
- jpeg=9e=h5eee18b_3
- kiwisolver=1.4.4=py312h6a678d5_0
- krb5=1.20.1=h143b758_1
- lame=3.100=h7b6447c_0
- lcms2=2.12=h3be6417_0
- ld_impl_linux-64=2.38=h1181459_1
- lerc=3.0=h295c915_0
- libbrotlicommon=1.0.9=h5eee18b_8
- libbrotlidec=1.0.9=h5eee18b_8
- libbrotlienc=1.0.9=h5eee18b_8
- libclang=14.0.6=default_hc6dbbc7_1
- libclang13=14.0.6=default_he11475f_1
- libcublas=12.4.2.65=0
- libcufft=11.2.0.44=0
- libcufile=1.9.1.3=0
- libcups=2.4.2=h2d74bed_1
- libcurand=10.3.5.147=0
- libcusolver=11.6.0.99=0
- libcusparse=12.3.0.142=0
- libdeflate=1.17=h5eee18b_1
- libedit=3.1.20230828=h5eee18b_0
- libffi=3.4.4=h6a678d5_1
- libgcc-ng=11.2.0=h1234567_1
- libgfortran-ng=11.2.0=h00389a5_1
- libgfortran5=11.2.0=h1234567_1
- libglib=2.78.4=hdc74915_0
- libgomp=11.2.0=h1234567_1
- libiconv=1.16=h5eee18b_3
- libidn2=2.3.4=h5eee18b_0
- libjpeg-turbo=2.0.0=h9bf148f_0
- libllvm14=14.0.6=hdb19cb5_3
- libnpp=12.2.5.2=0
- libnvfatbin=12.4.127=0
- libnvjitlink=12.4.99=0
- libnvjpeg=12.3.1.89=0
- libpng=1.6.39=h5eee18b_0
- libpq=12.17=hdbd6064_0
- libstdcxx-ng=11.2.0=h1234567_1
- libtasn1=4.19.0=h5eee18b_0
- libtiff=4.5.1=h6a678d5_0
- libunistring=0.9.10=h27cfd23_0
- libuuid=1.41.5=h5eee18b_0
- libwebp-base=1.3.2=h5eee18b_0
- libxcb=1.15=h7f8727e_0
- libxkbcommon=1.0.1=h097e994_2
- libxml2=2.13.1=hfdd30dd_2
- llvm-openmp=14.0.6=h9e868ea_0
- lz4-c=1.9.4=h6a678d5_1
- markupsafe=2.1.3=py312h5eee18b_0
- matplotlib=3.8.4=py312h06a4308_0
- matplotlib-base=3.8.4=py312h526ad5a_0
- mkl=2023.1.0=h213fc3f_46344
- mkl-service=2.4.0=py312h5eee18b_1
- mkl_fft=1.3.8=py312h5eee18b_0
- mkl_random=1.2.4=py312hdb19cb5_0
- mpmath=1.3.0=py312h06a4308_0
- mysql=5.7.24=h721c034_2
- ncurses=6.4=h6a678d5_0
- nettle=3.7.3=hbbd107a_1
- networkx=3.3=py312h06a4308_0
- numexpr=2.8.7=py312hf827012_0
- numpy=1.26.4=py312hc5e2394_0
- numpy-base=1.26.4=py312h0da6c21_0
- openh264=2.1.1=h4ff587b_0
- openjpeg=2.5.2=he7f1fd0_0
- openpyxl=3.1.5=py312h5eee18b_0
- openssl=3.0.15=h5eee18b_0
- packaging=24.1=py312h06a4308_0
- pandas=2.2.2=py312h526ad5a_0
- pcre2=10.42=hebb0a14_1
- pillow=10.4.0=py312h5eee18b_0
- pip=24.2=py312h06a4308_0
- ply=3.11=py312h06a4308_1
- pybind11-abi=5=hd3eb1b0_0
- pyopengl=3.1.1a1=py312h06a4308_0
- pyparsing=3.0.9=py312h06a4308_0
- pyqt=5.15.10=py312h6a678d5_0
- pyqt5-sip=12.13.0=py312h5eee18b_0
- pysocks=1.7.1=py312h06a4308_0
- python=3.12.4=h5148396_1
- python-dateutil=2.9.0post0=py312h06a4308_2
- python-tzdata=2023.3=pyhd3eb1b0_0
- pytorch=2.4.0=py3.12_cuda12.4_cudnn9.1.0_0
- pytorch-cuda=12.4=hc786d27_6
- pytorch-mutex=1.0=cuda
- pytz=2024.1=py312h06a4308_0
- pyyaml=6.0.1=py312h5eee18b_0
- qt-main=5.15.2=h53bd1ea_10
- readline=8.2=h5eee18b_0
- requests=2.32.3=py312h06a4308_0
- scikit-learn=1.5.1=py312h526ad5a_0
- scipy=1.13.1=py312hc5e2394_0
- seaborn=0.13.2=py312h06a4308_0
- setuptools=72.1.0=py312h06a4308_0
- sip=6.7.12=py312h6a678d5_0
- six=1.16.0=pyhd3eb1b0_1
- sqlite=3.45.3=h5eee18b_0
- sympy=1.12=py312h06a4308_0
- tbb=2021.8.0=hdb19cb5_0
- threadpoolctl=3.5.0=py312he106c6f_0
- tk=8.6.14=h39e8969_0
- torchaudio=2.4.0=py312_cu124
- torchtriton=3.0.0=py312
- torchvision=0.19.0=py312_cu124
- tornado=6.4.1=py312h5eee18b_0
- tqdm=4.66.5=py312he106c6f_0
- typing_extensions=4.11.0=py312h06a4308_0
- tzdata=2024a=h04d1e81_0
- unicodedata2=15.1.0=py312h5eee18b_0
- urllib3=2.2.2=py312h06a4308_0
- wheel=0.43.0=py312h06a4308_0
- xlrd=2.0.1=pyhd3eb1b0_1
- xz=5.4.6=h5eee18b_1
- yaml=0.2.5=h7b6447c_0
- zlib=1.2.13=h5eee18b_1
- zstd=1.5.5=hc292b87_2
- pip:
- autopep8==2.3.1
- basedpyright==1.16.0
- black==24.8.0
- click==8.1.7
- fsspec==2024.6.1
- graphviz==0.20.3
- greenlet==3.0.3
- msgpack==1.0.8
- mypy-extensions==1.0.0
- nodejs-wheel-binaries==20.16.0
- pathspec==0.12.1
- platformdirs==4.2.2
- pycodestyle==2.12.1
- pynvim==0.5.0
prefix: /home/qyhhh/.miniconda3/envs/Deeplearning

13
main.py
View File

@ -5,13 +5,12 @@ from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx
def main():
# 输入元数据文件夹名称
projet_name = '20251214 WZSX'
projet_name = '20260109WZSX'
# 请在[]内输入每一个分类的名称
label_names = ['canvas', 'lambswool',
'lychee_grain', 'non-woven_fabric', 'nylon',
'PDMS', 'PET', 'PTFE', 'pure_cotton', 'ramie',
'silk_cotton', 'suede'
]
label_names = [
'Crocodile grain', 'Litchi grain','Pin grain',
'Mohair tweed', 'Polar fleece', 'Berber fleece'
]
print(label_names)
data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx')
X_train, X_test, y_train, y_test, encoder = divSet(
@ -20,7 +19,7 @@ def main():
model = Qmlp(
X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
hidden_layers = [1024, 512, 256],
hidden_layers = [256, 256, 256],
dropout_rate=0
)