20260109WZSX
This commit is contained in:
parent
3e07b4258f
commit
5da38b49b0
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import unicodedata
|
||||
import pandas as pd
|
||||
|
||||
STATIC_PATH = './Static'
|
||||
|
|
@ -46,12 +47,33 @@ def load_from_folder(folder, labelNames, fileClass):
|
|||
return pd.concat(all_features, ignore_index=True)
|
||||
|
||||
def load_from_file(folder, labelNames, fileClass):
|
||||
fileNames = [labelName + "." + fileClass for labelName in labelNames]
|
||||
# 获取数据的最大行数
|
||||
max_row_length = get_max_row_len(folder, fileNames)
|
||||
# 构建期望的文件名(label + .扩展名),并在目录中进行健壮匹配(去除零宽字符、Unicode 规范化、大小写不敏感)
|
||||
expected_names = [f"{labelName}.{fileClass}" for labelName in labelNames]
|
||||
|
||||
actual_file_names = []
|
||||
missing = []
|
||||
for expected in expected_names:
|
||||
match = _find_matching_file(folder, expected)
|
||||
if match is None:
|
||||
missing.append(expected)
|
||||
else:
|
||||
actual_file_names.append(match)
|
||||
|
||||
if missing:
|
||||
available = sorted(os.listdir(folder))
|
||||
raise FileNotFoundError(
|
||||
"The following files were not found (after normalization): "
|
||||
+ ", ".join(missing)
|
||||
+ f". Available files: {available}"
|
||||
)
|
||||
|
||||
# 获取数据的最大行数(使用实际匹配到的文件名)
|
||||
max_row_length = get_max_row_len(folder, actual_file_names)
|
||||
|
||||
all_features = []
|
||||
for i, fileName in enumerate(fileNames):
|
||||
features = load_xlsx(folder + '/' + fileName, labelNames[i], max_row_length, 'zero')
|
||||
for i, fileName in enumerate(actual_file_names):
|
||||
file_path = os.path.join(folder, fileName)
|
||||
features = load_xlsx(file_path, labelNames[i], max_row_length, 'zero')
|
||||
all_features.append(features)
|
||||
return pd.concat(all_features, ignore_index = True)
|
||||
|
||||
|
|
@ -109,3 +131,58 @@ def get_max_row_len(folder, filenames):
|
|||
return max_len
|
||||
|
||||
__all__ = ['load_data']
|
||||
|
||||
# ---------- 内部工具函数:处理包含零宽字符或不同 Unicode 形式的文件名匹配 ----------
|
||||
|
||||
def _strip_zero_width(s: str) -> str:
|
||||
# 移除常见零宽字符:U+200B, U+200C, U+200D, U+FEFF
|
||||
if not isinstance(s, str):
|
||||
return s
|
||||
return s.translate({
|
||||
0x200B: None, # ZERO WIDTH SPACE
|
||||
0x200C: None, # ZERO WIDTH NON-JOINER
|
||||
0x200D: None, # ZERO WIDTH JOINER
|
||||
0xFEFF: None, # ZERO WIDTH NO-BREAK SPACE
|
||||
})
|
||||
|
||||
def _canonicalize_name(name: str) -> str:
|
||||
# 规范化到 NFKC,并移除零宽字符
|
||||
name = unicodedata.normalize('NFKC', name)
|
||||
name = _strip_zero_width(name)
|
||||
return name
|
||||
|
||||
def _normalize_for_compare(name: str) -> str:
|
||||
# 进一步规范化用于宽松比较:
|
||||
# - 统一大小写
|
||||
# - 将下划线视为空格(与文件名用下划线代替空格的情况匹配)
|
||||
# - 折叠所有空白为一个空格,并去除首尾空格
|
||||
n = _canonicalize_name(name)
|
||||
n = n.replace('_', ' ')
|
||||
n = ' '.join(n.split())
|
||||
return n.lower()
|
||||
|
||||
def _find_matching_file(folder: str, expected_name: str):
|
||||
# 首先进行严格匹配(规范化后相等)
|
||||
expected = _canonicalize_name(expected_name)
|
||||
try:
|
||||
entries = os.listdir(folder)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
for f in entries:
|
||||
if _canonicalize_name(f) == expected:
|
||||
return f
|
||||
|
||||
# 次要策略:大小写不敏感比较
|
||||
expected_lower = expected.lower()
|
||||
for f in entries:
|
||||
if _canonicalize_name(f).lower() == expected_lower:
|
||||
return f
|
||||
|
||||
# 宽松策略:将下划线当作空格处理,并折叠空白(用于匹配 "Crocodile grain" vs "Crocodile_grain")
|
||||
expected_relaxed = _normalize_for_compare(expected_name)
|
||||
for f in entries:
|
||||
if _normalize_for_compare(f) == expected_relaxed:
|
||||
return f
|
||||
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -0,0 +1,180 @@
|
|||
name: Deeplearning
|
||||
channels:
|
||||
- pytorch
|
||||
- nvidia
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- blas=1.0=mkl
|
||||
- bottleneck=1.3.7=py312ha883a20_0
|
||||
- brotli=1.0.9=h5eee18b_8
|
||||
- brotli-bin=1.0.9=h5eee18b_8
|
||||
- brotli-python=1.0.9=py312h6a678d5_8
|
||||
- bzip2=1.0.8=h5eee18b_6
|
||||
- ca-certificates=2024.11.26=h06a4308_0
|
||||
- certifi=2024.12.14=py312h06a4308_0
|
||||
- charset-normalizer=3.3.2=pyhd3eb1b0_0
|
||||
- contourpy=1.2.0=py312hdb19cb5_0
|
||||
- cuda-cudart=12.4.127=0
|
||||
- cuda-cupti=12.4.127=0
|
||||
- cuda-libraries=12.4.0=0
|
||||
- cuda-nvrtc=12.4.127=0
|
||||
- cuda-nvtx=12.4.127=0
|
||||
- cuda-opencl=12.4.127=0
|
||||
- cuda-runtime=12.4.0=0
|
||||
- cudatoolkit=11.5.1=hcf5317a_9
|
||||
- cycler=0.11.0=pyhd3eb1b0_0
|
||||
- cyrus-sasl=2.1.28=h52b45da_1
|
||||
- dbus=1.13.18=hb2f20db_0
|
||||
- debugpy=1.6.7=py312h6a678d5_0
|
||||
- et_xmlfile=1.1.0=py312h06a4308_1
|
||||
- expat=2.6.2=h6a678d5_0
|
||||
- ffmpeg=4.3=hf484d3e_0
|
||||
- filelock=3.13.1=py312h06a4308_0
|
||||
- fontconfig=2.14.1=h55d465d_3
|
||||
- fonttools=4.51.0=py312h5eee18b_0
|
||||
- freetype=2.12.1=h4a9f257_0
|
||||
- glib=2.78.4=h6a678d5_0
|
||||
- glib-tools=2.78.4=h6a678d5_0
|
||||
- gmp=6.2.1=h295c915_3
|
||||
- gnutls=3.6.15=he1e5248_0
|
||||
- gst-plugins-base=1.14.1=h6a678d5_1
|
||||
- gstreamer=1.14.1=h5eee18b_1
|
||||
- icu=73.1=h6a678d5_0
|
||||
- idna=3.7=py312h06a4308_0
|
||||
- intel-openmp=2023.1.0=hdb19cb5_46306
|
||||
- jinja2=3.1.4=py312h06a4308_0
|
||||
- joblib=1.4.2=py312h06a4308_0
|
||||
- jpeg=9e=h5eee18b_3
|
||||
- kiwisolver=1.4.4=py312h6a678d5_0
|
||||
- krb5=1.20.1=h143b758_1
|
||||
- lame=3.100=h7b6447c_0
|
||||
- lcms2=2.12=h3be6417_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- lerc=3.0=h295c915_0
|
||||
- libbrotlicommon=1.0.9=h5eee18b_8
|
||||
- libbrotlidec=1.0.9=h5eee18b_8
|
||||
- libbrotlienc=1.0.9=h5eee18b_8
|
||||
- libclang=14.0.6=default_hc6dbbc7_1
|
||||
- libclang13=14.0.6=default_he11475f_1
|
||||
- libcublas=12.4.2.65=0
|
||||
- libcufft=11.2.0.44=0
|
||||
- libcufile=1.9.1.3=0
|
||||
- libcups=2.4.2=h2d74bed_1
|
||||
- libcurand=10.3.5.147=0
|
||||
- libcusolver=11.6.0.99=0
|
||||
- libcusparse=12.3.0.142=0
|
||||
- libdeflate=1.17=h5eee18b_1
|
||||
- libedit=3.1.20230828=h5eee18b_0
|
||||
- libffi=3.4.4=h6a678d5_1
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgfortran-ng=11.2.0=h00389a5_1
|
||||
- libgfortran5=11.2.0=h1234567_1
|
||||
- libglib=2.78.4=hdc74915_0
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libiconv=1.16=h5eee18b_3
|
||||
- libidn2=2.3.4=h5eee18b_0
|
||||
- libjpeg-turbo=2.0.0=h9bf148f_0
|
||||
- libllvm14=14.0.6=hdb19cb5_3
|
||||
- libnpp=12.2.5.2=0
|
||||
- libnvfatbin=12.4.127=0
|
||||
- libnvjitlink=12.4.99=0
|
||||
- libnvjpeg=12.3.1.89=0
|
||||
- libpng=1.6.39=h5eee18b_0
|
||||
- libpq=12.17=hdbd6064_0
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libtasn1=4.19.0=h5eee18b_0
|
||||
- libtiff=4.5.1=h6a678d5_0
|
||||
- libunistring=0.9.10=h27cfd23_0
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- libwebp-base=1.3.2=h5eee18b_0
|
||||
- libxcb=1.15=h7f8727e_0
|
||||
- libxkbcommon=1.0.1=h097e994_2
|
||||
- libxml2=2.13.1=hfdd30dd_2
|
||||
- llvm-openmp=14.0.6=h9e868ea_0
|
||||
- lz4-c=1.9.4=h6a678d5_1
|
||||
- markupsafe=2.1.3=py312h5eee18b_0
|
||||
- matplotlib=3.8.4=py312h06a4308_0
|
||||
- matplotlib-base=3.8.4=py312h526ad5a_0
|
||||
- mkl=2023.1.0=h213fc3f_46344
|
||||
- mkl-service=2.4.0=py312h5eee18b_1
|
||||
- mkl_fft=1.3.8=py312h5eee18b_0
|
||||
- mkl_random=1.2.4=py312hdb19cb5_0
|
||||
- mpmath=1.3.0=py312h06a4308_0
|
||||
- mysql=5.7.24=h721c034_2
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- nettle=3.7.3=hbbd107a_1
|
||||
- networkx=3.3=py312h06a4308_0
|
||||
- numexpr=2.8.7=py312hf827012_0
|
||||
- numpy=1.26.4=py312hc5e2394_0
|
||||
- numpy-base=1.26.4=py312h0da6c21_0
|
||||
- openh264=2.1.1=h4ff587b_0
|
||||
- openjpeg=2.5.2=he7f1fd0_0
|
||||
- openpyxl=3.1.5=py312h5eee18b_0
|
||||
- openssl=3.0.15=h5eee18b_0
|
||||
- packaging=24.1=py312h06a4308_0
|
||||
- pandas=2.2.2=py312h526ad5a_0
|
||||
- pcre2=10.42=hebb0a14_1
|
||||
- pillow=10.4.0=py312h5eee18b_0
|
||||
- pip=24.2=py312h06a4308_0
|
||||
- ply=3.11=py312h06a4308_1
|
||||
- pybind11-abi=5=hd3eb1b0_0
|
||||
- pyopengl=3.1.1a1=py312h06a4308_0
|
||||
- pyparsing=3.0.9=py312h06a4308_0
|
||||
- pyqt=5.15.10=py312h6a678d5_0
|
||||
- pyqt5-sip=12.13.0=py312h5eee18b_0
|
||||
- pysocks=1.7.1=py312h06a4308_0
|
||||
- python=3.12.4=h5148396_1
|
||||
- python-dateutil=2.9.0post0=py312h06a4308_2
|
||||
- python-tzdata=2023.3=pyhd3eb1b0_0
|
||||
- pytorch=2.4.0=py3.12_cuda12.4_cudnn9.1.0_0
|
||||
- pytorch-cuda=12.4=hc786d27_6
|
||||
- pytorch-mutex=1.0=cuda
|
||||
- pytz=2024.1=py312h06a4308_0
|
||||
- pyyaml=6.0.1=py312h5eee18b_0
|
||||
- qt-main=5.15.2=h53bd1ea_10
|
||||
- readline=8.2=h5eee18b_0
|
||||
- requests=2.32.3=py312h06a4308_0
|
||||
- scikit-learn=1.5.1=py312h526ad5a_0
|
||||
- scipy=1.13.1=py312hc5e2394_0
|
||||
- seaborn=0.13.2=py312h06a4308_0
|
||||
- setuptools=72.1.0=py312h06a4308_0
|
||||
- sip=6.7.12=py312h6a678d5_0
|
||||
- six=1.16.0=pyhd3eb1b0_1
|
||||
- sqlite=3.45.3=h5eee18b_0
|
||||
- sympy=1.12=py312h06a4308_0
|
||||
- tbb=2021.8.0=hdb19cb5_0
|
||||
- threadpoolctl=3.5.0=py312he106c6f_0
|
||||
- tk=8.6.14=h39e8969_0
|
||||
- torchaudio=2.4.0=py312_cu124
|
||||
- torchtriton=3.0.0=py312
|
||||
- torchvision=0.19.0=py312_cu124
|
||||
- tornado=6.4.1=py312h5eee18b_0
|
||||
- tqdm=4.66.5=py312he106c6f_0
|
||||
- typing_extensions=4.11.0=py312h06a4308_0
|
||||
- tzdata=2024a=h04d1e81_0
|
||||
- unicodedata2=15.1.0=py312h5eee18b_0
|
||||
- urllib3=2.2.2=py312h06a4308_0
|
||||
- wheel=0.43.0=py312h06a4308_0
|
||||
- xlrd=2.0.1=pyhd3eb1b0_1
|
||||
- xz=5.4.6=h5eee18b_1
|
||||
- yaml=0.2.5=h7b6447c_0
|
||||
- zlib=1.2.13=h5eee18b_1
|
||||
- zstd=1.5.5=hc292b87_2
|
||||
- pip:
|
||||
- autopep8==2.3.1
|
||||
- basedpyright==1.16.0
|
||||
- black==24.8.0
|
||||
- click==8.1.7
|
||||
- fsspec==2024.6.1
|
||||
- graphviz==0.20.3
|
||||
- greenlet==3.0.3
|
||||
- msgpack==1.0.8
|
||||
- mypy-extensions==1.0.0
|
||||
- nodejs-wheel-binaries==20.16.0
|
||||
- pathspec==0.12.1
|
||||
- platformdirs==4.2.2
|
||||
- pycodestyle==2.12.1
|
||||
- pynvim==0.5.0
|
||||
prefix: /home/qyhhh/.miniconda3/envs/Deeplearning
|
||||
11
main.py
11
main.py
|
|
@ -5,12 +5,11 @@ from Qfunctions.saveToxlsx import save_to_xlsx as save_to_xlsx
|
|||
|
||||
def main():
|
||||
# 输入元数据文件夹名称
|
||||
projet_name = '20251214 WZSX'
|
||||
projet_name = '20260109WZSX'
|
||||
# 请在[]内输入每一个分类的名称
|
||||
label_names = ['canvas', 'lambswool',
|
||||
'lychee_grain', 'non-woven_fabric', 'nylon',
|
||||
'PDMS', 'PET', 'PTFE', 'pure_cotton', 'ramie',
|
||||
'silk_cotton', 'suede'
|
||||
label_names = [
|
||||
'Crocodile grain', 'Litchi grain','Pin grain',
|
||||
'Mohair tweed', 'Polar fleece', 'Berber fleece'
|
||||
]
|
||||
print(label_names)
|
||||
data = load_data(projet_name, label_names, isDir=False, fileClass='xlsx')
|
||||
|
|
@ -20,7 +19,7 @@ def main():
|
|||
|
||||
model = Qmlp(
|
||||
X_train=X_train, X_test=X_test, y_train=y_train, y_test= y_test,
|
||||
hidden_layers = [1024, 512, 256],
|
||||
hidden_layers = [256, 256, 256],
|
||||
dropout_rate=0
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue