20260109WZSX

2026-02-28 00:44:50 +08:00 · 2026-02-28 00:44:50 +08:00 · 5da38b49b0
parent 3e07b4258f
commit 5da38b49b0
3 changed files with 274 additions and 18 deletions
--- a/Qfunctions/loaData.py
+++ b/Qfunctions/loaData.py
@ -1,4 +1,5 @@
 import os
+import unicodedata
 import pandas as pd

 STATIC_PATH = './Static'
@ -46,13 +47,34 @@ def load_from_folder(folder, labelNames, fileClass):
    return pd.concat(all_features, ignore_index=True)

 def load_from_file(folder, labelNames, fileClass):
-  fileNames = [labelName + "." + fileClass for labelName in labelNames]
-  # 获取数据的最大行数
-  max_row_length = get_max_row_len(folder, fileNames)
+  # 构建期望的文件名（label + .扩展名），并在目录中进行健壮匹配（去除零宽字符、Unicode 规范化、大小写不敏感）
+  expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames]
+
+  actual_file_names = []
+  missing = []
+  for expected in expected_names:
+    match = _find_matching_file(folder, expected)
+    if match is None:
+      missing.append(expected)
+    else:
+      actual_file_names.append(match)
+
+  if missing:
+    available = sorted(os.listdir(folder))
+    raise FileNotFoundError(
+      "The following files were not found (after normalization): "
+      + ", ".join(missing)
+      + f". Available files: {available}"
+    )
+
+  # 获取数据的最大行数（使用实际匹配到的文件名）
+  max_row_length = get_max_row_len(folder, actual_file_names)
+
  all_features = []
-  for i, fileName in enumerate(fileNames):
-      features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero')
-      all_features.append(features)
+  for i, fileName in enumerate(actual_file_names):
+    file_path = os.path.join(folder, fileName)
+    features = load_xlsx(file_path, labelNames[i], max_row_length, 'zero')
+    all_features.append(features)
  return pd.concat(all_features, ignore_index = True)


@ -102,10 +124,65 @@ def fill_to_len(row, length = 1000, rule = None):
    return pd.concat([row, fill_values], ignore_index=True)

 def get_max_row_len(folder, filenames):
-    max_len = 0
-    for filename in filenames:
-        df = pd.read_excel(os.path.join(folder, filename))
-        max_len = max(max_len, df.shape[0])
-    return max_len
+  max_len = 0
+  for filename in filenames:
+    df = pd.read_excel(os.path.join(folder, filename))
+    max_len = max(max_len, df.shape[0])
+  return max_len

 __all__ = ['load_data']
+
+# ---------- 内部工具函数：处理包含零宽字符或不同 Unicode 形式的文件名匹配 ----------
+
+def _strip_zero_width(s: str) -> str:
+  # 移除常见零宽字符：U+200B, U+200C, U+200D, U+FEFF
+  if not isinstance(s, str):
+    return s
+  return s.translate({
+    0x200B: None,  # ZERO WIDTH SPACE
+    0x200C: None,  # ZERO WIDTH NON-JOINER
+    0x200D: None,  # ZERO WIDTH JOINER
+    0xFEFF: None,  # ZERO WIDTH NO-BREAK SPACE
+  })
+
+def _canonicalize_name(name: str) -> str:
+  # 规范化到 NFKC，并移除零宽字符
+  name = unicodedata.normalize('NFKC', name)
+  name = _strip_zero_width(name)
+  return name
+
+def _normalize_for_compare(name: str) -> str:
+  # 进一步规范化用于宽松比较：
+  # - 统一大小写
+  # - 将下划线视为空格（与文件名用下划线代替空格的情况匹配）
+  # - 折叠所有空白为一个空格，并去除首尾空格
+  n = _canonicalize_name(name)
+  n = n.replace('_', ' ')
+  n = ' '.join(n.split())
+  return n.lower()
+
+def _find_matching_file(folder: str, expected_name: str):
+  # 首先进行严格匹配（规范化后相等）
+  expected = _canonicalize_name(expected_name)
+  try:
+    entries = os.listdir(folder)
+  except FileNotFoundError:
+    return None
+
+  for f in entries:
+    if _canonicalize_name(f) == expected:
+      return f
+
+  # 次要策略：大小写不敏感比较
+  expected_lower = expected.lower()
+  for f in entries:
+    if _canonicalize_name(f).lower() == expected_lower:
+      return f
+
+  # 宽松策略：将下划线当作空格处理，并折叠空白（用于匹配 "Crocodile grain" vs "Crocodile_grain"）
+  expected_relaxed = _normalize_for_compare(expected_name)
+  for f in entries:
+    if _normalize_for_compare(f) == expected_relaxed:
+      return f
+
+  return None
--- a/env.yml
+++ b/env.yml
@ -0,0 +1,180 @@
+name: Deeplearning
+channels:
+  - pytorch
+  - nvidia
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - blas=1.0=mkl
+  - bottleneck=1.3.7=py312ha883a20_0
+  - brotli=1.0.9=h5eee18b_8
+  - brotli-bin=1.0.9=h5eee18b_8
+  - brotli-python=1.0.9=py312h6a678d5_8
+  - bzip2=1.0.8=h5eee18b_6
+  - ca-certificates=2024.11.26=h06a4308_0
+  - certifi=2024.12.14=py312h06a4308_0
+  - charset-normalizer=3.3.2=pyhd3eb1b0_0
+  - contourpy=1.2.0=py312hdb19cb5_0
+  - cuda-cudart=12.4.127=0
+  - cuda-cupti=12.4.127=0
+  - cuda-libraries=12.4.0=0
+  - cuda-nvrtc=12.4.127=0
+  - cuda-nvtx=12.4.127=0
+  - cuda-opencl=12.4.127=0
+  - cuda-runtime=12.4.0=0
+  - cudatoolkit=11.5.1=hcf5317a_9
+  - cycler=0.11.0=pyhd3eb1b0_0
+  - cyrus-sasl=2.1.28=h52b45da_1
+  - dbus=1.13.18=hb2f20db_0
+  - debugpy=1.6.7=py312h6a678d5_0
+  - et_xmlfile=1.1.0=py312h06a4308_1
+  - expat=2.6.2=h6a678d5_0
+  - ffmpeg=4.3=hf484d3e_0
+  - filelock=3.13.1=py312h06a4308_0
+  - fontconfig=2.14.1=h55d465d_3
+  - fonttools=4.51.0=py312h5eee18b_0
+  - freetype=2.12.1=h4a9f257_0
+  - glib=2.78.4=h6a678d5_0
+  - glib-tools=2.78.4=h6a678d5_0
+  - gmp=6.2.1=h295c915_3
+  - gnutls=3.6.15=he1e5248_0
+  - gst-plugins-base=1.14.1=h6a678d5_1
+  - gstreamer=1.14.1=h5eee18b_1
+  - icu=73.1=h6a678d5_0
+  - idna=3.7=py312h06a4308_0
+  - intel-openmp=2023.1.0=hdb19cb5_46306
+  - jinja2=3.1.4=py312h06a4308_0
+  - joblib=1.4.2=py312h06a4308_0
+  - jpeg=9e=h5eee18b_3
+  - kiwisolver=1.4.4=py312h6a678d5_0
+  - krb5=1.20.1=h143b758_1
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - lerc=3.0=h295c915_0
+  - libbrotlicommon=1.0.9=h5eee18b_8
+  - libbrotlidec=1.0.9=h5eee18b_8
+  - libbrotlienc=1.0.9=h5eee18b_8
+  - libclang=14.0.6=default_hc6dbbc7_1
+  - libclang13=14.0.6=default_he11475f_1
+  - libcublas=12.4.2.65=0
+  - libcufft=11.2.0.44=0
+  - libcufile=1.9.1.3=0
+  - libcups=2.4.2=h2d74bed_1
+  - libcurand=10.3.5.147=0
+  - libcusolver=11.6.0.99=0
+  - libcusparse=12.3.0.142=0
+  - libdeflate=1.17=h5eee18b_1
+  - libedit=3.1.20230828=h5eee18b_0
+  - libffi=3.4.4=h6a678d5_1
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgfortran-ng=11.2.0=h00389a5_1
+  - libgfortran5=11.2.0=h1234567_1
+  - libglib=2.78.4=hdc74915_0
+  - libgomp=11.2.0=h1234567_1
+  - libiconv=1.16=h5eee18b_3
+  - libidn2=2.3.4=h5eee18b_0
+  - libjpeg-turbo=2.0.0=h9bf148f_0
+  - libllvm14=14.0.6=hdb19cb5_3
+  - libnpp=12.2.5.2=0
+  - libnvfatbin=12.4.127=0
+  - libnvjitlink=12.4.99=0
+  - libnvjpeg=12.3.1.89=0
+  - libpng=1.6.39=h5eee18b_0
+  - libpq=12.17=hdbd6064_0
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libtasn1=4.19.0=h5eee18b_0
+  - libtiff=4.5.1=h6a678d5_0
+  - libunistring=0.9.10=h27cfd23_0
+  - libuuid=1.41.5=h5eee18b_0
+  - libwebp-base=1.3.2=h5eee18b_0
+  - libxcb=1.15=h7f8727e_0
+  - libxkbcommon=1.0.1=h097e994_2
+  - libxml2=2.13.1=hfdd30dd_2
+  - llvm-openmp=14.0.6=h9e868ea_0
+  - lz4-c=1.9.4=h6a678d5_1
+  - markupsafe=2.1.3=py312h5eee18b_0
+  - matplotlib=3.8.4=py312h06a4308_0
+  - matplotlib-base=3.8.4=py312h526ad5a_0
+  - mkl=2023.1.0=h213fc3f_46344
+  - mkl-service=2.4.0=py312h5eee18b_1
+  - mkl_fft=1.3.8=py312h5eee18b_0
+  - mkl_random=1.2.4=py312hdb19cb5_0
+  - mpmath=1.3.0=py312h06a4308_0
+  - mysql=5.7.24=h721c034_2
+  - ncurses=6.4=h6a678d5_0
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=3.3=py312h06a4308_0
+  - numexpr=2.8.7=py312hf827012_0
+  - numpy=1.26.4=py312hc5e2394_0
+  - numpy-base=1.26.4=py312h0da6c21_0
+  - openh264=2.1.1=h4ff587b_0
+  - openjpeg=2.5.2=he7f1fd0_0
+  - openpyxl=3.1.5=py312h5eee18b_0
+  - openssl=3.0.15=h5eee18b_0
+  - packaging=24.1=py312h06a4308_0
+  - pandas=2.2.2=py312h526ad5a_0
+  - pcre2=10.42=hebb0a14_1
+  - pillow=10.4.0=py312h5eee18b_0
+  - pip=24.2=py312h06a4308_0
+  - ply=3.11=py312h06a4308_1
+  - pybind11-abi=5=hd3eb1b0_0
+  - pyopengl=3.1.1a1=py312h06a4308_0
+  - pyparsing=3.0.9=py312h06a4308_0
+  - pyqt=5.15.10=py312h6a678d5_0
+  - pyqt5-sip=12.13.0=py312h5eee18b_0
+  - pysocks=1.7.1=py312h06a4308_0
+  - python=3.12.4=h5148396_1
+  - python-dateutil=2.9.0post0=py312h06a4308_2
+  - python-tzdata=2023.3=pyhd3eb1b0_0
+  - pytorch=2.4.0=py3.12_cuda12.4_cudnn9.1.0_0
+  - pytorch-cuda=12.4=hc786d27_6
+  - pytorch-mutex=1.0=cuda
+  - pytz=2024.1=py312h06a4308_0
+  - pyyaml=6.0.1=py312h5eee18b_0
+  - qt-main=5.15.2=h53bd1ea_10
+  - readline=8.2=h5eee18b_0
+  - requests=2.32.3=py312h06a4308_0
+  - scikit-learn=1.5.1=py312h526ad5a_0
+  - scipy=1.13.1=py312hc5e2394_0
+  - seaborn=0.13.2=py312h06a4308_0
+  - setuptools=72.1.0=py312h06a4308_0
+  - sip=6.7.12=py312h6a678d5_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sqlite=3.45.3=h5eee18b_0
+  - sympy=1.12=py312h06a4308_0
+  - tbb=2021.8.0=hdb19cb5_0
+  - threadpoolctl=3.5.0=py312he106c6f_0
+  - tk=8.6.14=h39e8969_0
+  - torchaudio=2.4.0=py312_cu124
+  - torchtriton=3.0.0=py312
+  - torchvision=0.19.0=py312_cu124
+  - tornado=6.4.1=py312h5eee18b_0
+  - tqdm=4.66.5=py312he106c6f_0
+  - typing_extensions=4.11.0=py312h06a4308_0
+  - tzdata=2024a=h04d1e81_0
+  - unicodedata2=15.1.0=py312h5eee18b_0
+  - urllib3=2.2.2=py312h06a4308_0
+  - wheel=0.43.0=py312h06a4308_0
+  - xlrd=2.0.1=pyhd3eb1b0_1
+  - xz=5.4.6=h5eee18b_1
+  - yaml=0.2.5=h7b6447c_0
+  - zlib=1.2.13=h5eee18b_1
+  - zstd=1.5.5=hc292b87_2
+  - pip:
+      - autopep8==2.3.1
+      - basedpyright==1.16.0
+      - black==24.8.0
+      - click==8.1.7
+      - fsspec==2024.6.1
+      - graphviz==0.20.3
+      - greenlet==3.0.3
+      - msgpack==1.0.8
+      - mypy-extensions==1.0.0
+      - nodejs-wheel-binaries==20.16.0
+      - pathspec==0.12.1
+      - platformdirs==4.2.2
+      - pycodestyle==2.12.1
+      - pynvim==0.5.0
+prefix: /home/qyhhh/.miniconda3/envs/Deeplearning
--- a/main.py
+++ b/main.py
@ -5,13 +5,12 @@ from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx

 def main():
  # 输入元数据文件夹名称
-  projet_name = '20251214 WZSX'                                                                
+  projet_name = '20260109WZSX'                                                                
  # 请在[]内输入每一个分类的名称
-  label_names = ['canvas', 'lambswool', 
-                 'lychee_grain', 'non-woven_fabric', 'nylon',
-                 'PDMS', 'PET', 'PTFE', 'pure_cotton', 'ramie',
-                 'silk_cotton', 'suede'
-                 ]
+  label_names = [ 
+                  'Crocodile grain', 'Litchi grain','Pin grain', 
+                  'Mohair tweed', 'Polar fleece', 'Berber fleece'
+                ]
  print(label_names)
  data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx')
  X_train, X_test, y_train, y_test, encoder = divSet(
@ -20,7 +19,7 @@ def main():
  
  model = Qmlp(
    X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
-    hidden_layers = [1024, 512, 256],
+    hidden_layers = [256, 256, 256],
    dropout_rate=0      
    )