From 5da38b49b0fbfa6fe2af8e184948025cc83c18be Mon Sep 17 00:00:00 2001 From: "qyh1510@gmail.com" Date: Sat, 28 Feb 2026 00:44:50 +0800 Subject: [PATCH] 20260109WZSX --- Qfunctions/loaData.py | 99 ++++++++++++++++++++--- env.yml | 180 ++++++++++++++++++++++++++++++++++++++++++ main.py | 13 ++- 3 files changed, 274 insertions(+), 18 deletions(-) create mode 100644 env.yml diff --git a/Qfunctions/loaData.py b/Qfunctions/loaData.py index 75abac6..99f92b3 100644 --- a/Qfunctions/loaData.py +++ b/Qfunctions/loaData.py @@ -1,4 +1,5 @@ import os +import unicodedata import pandas as pd STATIC_PATH = './Static' @@ -46,13 +47,34 @@ def load_from_folder(folder, labelNames, fileClass): return pd.concat(all_features, ignore_index=True) def load_from_file(folder, labelNames, fileClass): - fileNames = [labelName + "." + fileClass for labelName in labelNames] - # 获取数据的最大行数 - max_row_length = get_max_row_len(folder, fileNames) + # 构建期望的文件名(label + .扩展名),并在目录中进行健壮匹配(去除零宽字符、Unicode 规范化、大小写不敏感) + expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames] + + actual_file_names = [] + missing = [] + for expected in expected_names: + match = _find_matching_file(folder, expected) + if match is None: + missing.append(expected) + else: + actual_file_names.append(match) + + if missing: + available = sorted(os.listdir(folder)) + raise FileNotFoundError( + "The following files were not found (after normalization): " + + ", ".join(missing) + + f". Available files: {available}" + ) + + # 获取数据的最大行数(使用实际匹配到的文件名) + max_row_length = get_max_row_len(folder, actual_file_names) + all_features = [] - for i, fileName in enumerate(fileNames): - features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero') - all_features.append(features) + for i, fileName in enumerate(actual_file_names): + file_path = os.path.join(folder, fileName) + features = load_xlsx(file_path, labelNames[i], max_row_length, 'zero') + all_features.append(features) return pd.concat(all_features, ignore_index = True) @@ -102,10 +124,65 @@ def fill_to_len(row, length = 1000, rule = None): return pd.concat([row, fill_values], ignore_index=True) def get_max_row_len(folder, filenames): - max_len = 0 - for filename in filenames: - df = pd.read_excel(os.path.join(folder, filename)) - max_len = max(max_len, df.shape[0]) - return max_len + max_len = 0 + for filename in filenames: + df = pd.read_excel(os.path.join(folder, filename)) + max_len = max(max_len, df.shape[0]) + return max_len __all__ = ['load_data'] + +# ---------- 内部工具函数:处理包含零宽字符或不同 Unicode 形式的文件名匹配 ---------- + +def _strip_zero_width(s: str) -> str: + # 移除常见零宽字符:U+200B, U+200C, U+200D, U+FEFF + if not isinstance(s, str): + return s + return s.translate({ + 0x200B: None, # ZERO WIDTH SPACE + 0x200C: None, # ZERO WIDTH NON-JOINER + 0x200D: None, # ZERO WIDTH JOINER + 0xFEFF: None, # ZERO WIDTH NO-BREAK SPACE + }) + +def _canonicalize_name(name: str) -> str: + # 规范化到 NFKC,并移除零宽字符 + name = unicodedata.normalize('NFKC', name) + name = _strip_zero_width(name) + return name + +def _normalize_for_compare(name: str) -> str: + # 进一步规范化用于宽松比较: + # - 统一大小写 + # - 将下划线视为空格(与文件名用下划线代替空格的情况匹配) + # - 折叠所有空白为一个空格,并去除首尾空格 + n = _canonicalize_name(name) + n = n.replace('_', ' ') + n = ' '.join(n.split()) + return n.lower() + +def _find_matching_file(folder: str, expected_name: str): + # 首先进行严格匹配(规范化后相等) + expected = _canonicalize_name(expected_name) + try: + entries = os.listdir(folder) + except FileNotFoundError: + return None + + for f in entries: + if _canonicalize_name(f) == expected: + return f + + # 次要策略:大小写不敏感比较 + expected_lower = expected.lower() + for f in entries: + if _canonicalize_name(f).lower() == expected_lower: + return f + + # 宽松策略:将下划线当作空格处理,并折叠空白(用于匹配 "Crocodile grain" vs "Crocodile_grain") + expected_relaxed = _normalize_for_compare(expected_name) + for f in entries: + if _normalize_for_compare(f) == expected_relaxed: + return f + + return None diff --git a/env.yml b/env.yml new file mode 100644 index 0000000..fa3c78f --- /dev/null +++ b/env.yml @@ -0,0 +1,180 @@ +name: Deeplearning +channels: + - pytorch + - nvidia + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - blas=1.0=mkl + - bottleneck=1.3.7=py312ha883a20_0 + - brotli=1.0.9=h5eee18b_8 + - brotli-bin=1.0.9=h5eee18b_8 + - brotli-python=1.0.9=py312h6a678d5_8 + - bzip2=1.0.8=h5eee18b_6 + - ca-certificates=2024.11.26=h06a4308_0 + - certifi=2024.12.14=py312h06a4308_0 + - charset-normalizer=3.3.2=pyhd3eb1b0_0 + - contourpy=1.2.0=py312hdb19cb5_0 + - cuda-cudart=12.4.127=0 + - cuda-cupti=12.4.127=0 + - cuda-libraries=12.4.0=0 + - cuda-nvrtc=12.4.127=0 + - cuda-nvtx=12.4.127=0 + - cuda-opencl=12.4.127=0 + - cuda-runtime=12.4.0=0 + - cudatoolkit=11.5.1=hcf5317a_9 + - cycler=0.11.0=pyhd3eb1b0_0 + - cyrus-sasl=2.1.28=h52b45da_1 + - dbus=1.13.18=hb2f20db_0 + - debugpy=1.6.7=py312h6a678d5_0 + - et_xmlfile=1.1.0=py312h06a4308_1 + - expat=2.6.2=h6a678d5_0 + - ffmpeg=4.3=hf484d3e_0 + - filelock=3.13.1=py312h06a4308_0 + - fontconfig=2.14.1=h55d465d_3 + - fonttools=4.51.0=py312h5eee18b_0 + - freetype=2.12.1=h4a9f257_0 + - glib=2.78.4=h6a678d5_0 + - glib-tools=2.78.4=h6a678d5_0 + - gmp=6.2.1=h295c915_3 + - gnutls=3.6.15=he1e5248_0 + - gst-plugins-base=1.14.1=h6a678d5_1 + - gstreamer=1.14.1=h5eee18b_1 + - icu=73.1=h6a678d5_0 + - idna=3.7=py312h06a4308_0 + - intel-openmp=2023.1.0=hdb19cb5_46306 + - jinja2=3.1.4=py312h06a4308_0 + - joblib=1.4.2=py312h06a4308_0 + - jpeg=9e=h5eee18b_3 + - kiwisolver=1.4.4=py312h6a678d5_0 + - krb5=1.20.1=h143b758_1 + - lame=3.100=h7b6447c_0 + - lcms2=2.12=h3be6417_0 + - ld_impl_linux-64=2.38=h1181459_1 + - lerc=3.0=h295c915_0 + - libbrotlicommon=1.0.9=h5eee18b_8 + - libbrotlidec=1.0.9=h5eee18b_8 + - libbrotlienc=1.0.9=h5eee18b_8 + - libclang=14.0.6=default_hc6dbbc7_1 + - libclang13=14.0.6=default_he11475f_1 + - libcublas=12.4.2.65=0 + - libcufft=11.2.0.44=0 + - libcufile=1.9.1.3=0 + - libcups=2.4.2=h2d74bed_1 + - libcurand=10.3.5.147=0 + - libcusolver=11.6.0.99=0 + - libcusparse=12.3.0.142=0 + - libdeflate=1.17=h5eee18b_1 + - libedit=3.1.20230828=h5eee18b_0 + - libffi=3.4.4=h6a678d5_1 + - libgcc-ng=11.2.0=h1234567_1 + - libgfortran-ng=11.2.0=h00389a5_1 + - libgfortran5=11.2.0=h1234567_1 + - libglib=2.78.4=hdc74915_0 + - libgomp=11.2.0=h1234567_1 + - libiconv=1.16=h5eee18b_3 + - libidn2=2.3.4=h5eee18b_0 + - libjpeg-turbo=2.0.0=h9bf148f_0 + - libllvm14=14.0.6=hdb19cb5_3 + - libnpp=12.2.5.2=0 + - libnvfatbin=12.4.127=0 + - libnvjitlink=12.4.99=0 + - libnvjpeg=12.3.1.89=0 + - libpng=1.6.39=h5eee18b_0 + - libpq=12.17=hdbd6064_0 + - libstdcxx-ng=11.2.0=h1234567_1 + - libtasn1=4.19.0=h5eee18b_0 + - libtiff=4.5.1=h6a678d5_0 + - libunistring=0.9.10=h27cfd23_0 + - libuuid=1.41.5=h5eee18b_0 + - libwebp-base=1.3.2=h5eee18b_0 + - libxcb=1.15=h7f8727e_0 + - libxkbcommon=1.0.1=h097e994_2 + - libxml2=2.13.1=hfdd30dd_2 + - llvm-openmp=14.0.6=h9e868ea_0 + - lz4-c=1.9.4=h6a678d5_1 + - markupsafe=2.1.3=py312h5eee18b_0 + - matplotlib=3.8.4=py312h06a4308_0 + - matplotlib-base=3.8.4=py312h526ad5a_0 + - mkl=2023.1.0=h213fc3f_46344 + - mkl-service=2.4.0=py312h5eee18b_1 + - mkl_fft=1.3.8=py312h5eee18b_0 + - mkl_random=1.2.4=py312hdb19cb5_0 + - mpmath=1.3.0=py312h06a4308_0 + - mysql=5.7.24=h721c034_2 + - ncurses=6.4=h6a678d5_0 + - nettle=3.7.3=hbbd107a_1 + - networkx=3.3=py312h06a4308_0 + - numexpr=2.8.7=py312hf827012_0 + - numpy=1.26.4=py312hc5e2394_0 + - numpy-base=1.26.4=py312h0da6c21_0 + - openh264=2.1.1=h4ff587b_0 + - openjpeg=2.5.2=he7f1fd0_0 + - openpyxl=3.1.5=py312h5eee18b_0 + - openssl=3.0.15=h5eee18b_0 + - packaging=24.1=py312h06a4308_0 + - pandas=2.2.2=py312h526ad5a_0 + - pcre2=10.42=hebb0a14_1 + - pillow=10.4.0=py312h5eee18b_0 + - pip=24.2=py312h06a4308_0 + - ply=3.11=py312h06a4308_1 + - pybind11-abi=5=hd3eb1b0_0 + - pyopengl=3.1.1a1=py312h06a4308_0 + - pyparsing=3.0.9=py312h06a4308_0 + - pyqt=5.15.10=py312h6a678d5_0 + - pyqt5-sip=12.13.0=py312h5eee18b_0 + - pysocks=1.7.1=py312h06a4308_0 + - python=3.12.4=h5148396_1 + - python-dateutil=2.9.0post0=py312h06a4308_2 + - python-tzdata=2023.3=pyhd3eb1b0_0 + - pytorch=2.4.0=py3.12_cuda12.4_cudnn9.1.0_0 + - pytorch-cuda=12.4=hc786d27_6 + - pytorch-mutex=1.0=cuda + - pytz=2024.1=py312h06a4308_0 + - pyyaml=6.0.1=py312h5eee18b_0 + - qt-main=5.15.2=h53bd1ea_10 + - readline=8.2=h5eee18b_0 + - requests=2.32.3=py312h06a4308_0 + - scikit-learn=1.5.1=py312h526ad5a_0 + - scipy=1.13.1=py312hc5e2394_0 + - seaborn=0.13.2=py312h06a4308_0 + - setuptools=72.1.0=py312h06a4308_0 + - sip=6.7.12=py312h6a678d5_0 + - six=1.16.0=pyhd3eb1b0_1 + - sqlite=3.45.3=h5eee18b_0 + - sympy=1.12=py312h06a4308_0 + - tbb=2021.8.0=hdb19cb5_0 + - threadpoolctl=3.5.0=py312he106c6f_0 + - tk=8.6.14=h39e8969_0 + - torchaudio=2.4.0=py312_cu124 + - torchtriton=3.0.0=py312 + - torchvision=0.19.0=py312_cu124 + - tornado=6.4.1=py312h5eee18b_0 + - tqdm=4.66.5=py312he106c6f_0 + - typing_extensions=4.11.0=py312h06a4308_0 + - tzdata=2024a=h04d1e81_0 + - unicodedata2=15.1.0=py312h5eee18b_0 + - urllib3=2.2.2=py312h06a4308_0 + - wheel=0.43.0=py312h06a4308_0 + - xlrd=2.0.1=pyhd3eb1b0_1 + - xz=5.4.6=h5eee18b_1 + - yaml=0.2.5=h7b6447c_0 + - zlib=1.2.13=h5eee18b_1 + - zstd=1.5.5=hc292b87_2 + - pip: + - autopep8==2.3.1 + - basedpyright==1.16.0 + - black==24.8.0 + - click==8.1.7 + - fsspec==2024.6.1 + - graphviz==0.20.3 + - greenlet==3.0.3 + - msgpack==1.0.8 + - mypy-extensions==1.0.0 + - nodejs-wheel-binaries==20.16.0 + - pathspec==0.12.1 + - platformdirs==4.2.2 + - pycodestyle==2.12.1 + - pynvim==0.5.0 +prefix: /home/qyhhh/.miniconda3/envs/Deeplearning diff --git a/main.py b/main.py index 658341a..d8b2223 100644 --- a/main.py +++ b/main.py @@ -5,13 +5,12 @@ from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx def main(): # 输入元数据文件夹名称 - projet_name = '20251214 WZSX' + projet_name = '20260109WZSX' # 请在[]内输入每一个分类的名称 - label_names = ['canvas', 'lambswool', - 'lychee_grain', 'non-woven_fabric', 'nylon', - 'PDMS', 'PET', 'PTFE', 'pure_cotton', 'ramie', - 'silk_cotton', 'suede' - ] + label_names = [ + 'Crocodile grain', 'Litchi grain','Pin grain', + 'Mohair tweed', 'Polar fleece', 'Berber fleece' + ] print(label_names) data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx') X_train, X_test, y_train, y_test, encoder = divSet( @@ -20,7 +19,7 @@ def main(): model = Qmlp( X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test, - hidden_layers = [1024, 512, 256], + hidden_layers = [256, 256, 256], dropout_rate=0 )