import os import pandas as pd STATIC_PATH = './Static' def dLoader(folder, label_names=None): """ Load data from Excel files in a specified folder. Args: folder (str): Name of the folder containing Excel files. label_names (list): Optional list of label names. If not provided, file names will be used. Returns: pandas.DataFrame: Loaded and processed data. """ folder_path = os.path.join(STATIC_PATH, folder) file_names = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')] if not label_names: label_names = [f.split('.')[0] for f in file_names] max_row_length = get_max_row_len(folder_path, file_names) all_features = [] for i, file_name in enumerate(file_names): features = load_xlsx(os.path.join(folder_path, file_name), label_names[i], max_row_length) all_features.append(features) return pd.concat(all_features, ignore_index=True) def load_xlsx(file_name, label_name, max_row_length, fill_rule='mean'): """ Load and process data from a single Excel file. Args: file_name (str): Path to the Excel file. label_name (str): Label for the data in this file. max_row_length (int): Maximum number of rows to consider. fill_rule (str): Rule for filling missing values ('min', 'mean', or None). Returns: pandas.DataFrame: Processed data from the Excel file. """ df = pd.read_excel(file_name) features = df.iloc[0:, 1::2] features.dropna(inplace=True) features.reset_index(drop=True, inplace=True) features = features.T features = features.apply(lambda row: fill_to_len(row, max_row_length, fill_rule), axis=1) features['label'] = label_name features.columns = [f'feature{i+1}' for i in range(max_row_length)] + ['label'] return features def fill_to_len(row, length=1000, rule=None): """ Fill a row to a specified length. Args: row (pandas.Series): Row to fill. length (int): Desired length of the row. rule (str): Rule for filling ('min', 'mean', or None). Returns: pandas.Series: Filled row. """ fill_value = 0 if rule == 'min': fill_value = row.min() elif rule == 'mean': fill_value = row.mean() fill_values = pd.Series([fill_value] * (length - len(row))) return pd.concat([row, fill_values], ignore_index=True) def get_max_row_len(folder, filenames): """ Get the maximum row length across all Excel files in a folder. Args: folder (str): Path to the folder containing Excel files. filenames (list): List of Excel file names. Returns: int: Maximum row length. """ return max(pd.read_excel(os.path.join(folder, filename)).shape[0] for filename in filenames)