from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, LabelEncoder def dsplit(data, labels=None, test_size=0.2, random_state=None): """ Split the dataset into training and testing sets. Args: data (pandas.DataFrame): Input data. labels (list): Optional list of labels. test_size (float): Proportion of the dataset to include in the test split. random_state (int): Random state for reproducibility. Returns: tuple: X_train, X_test, y_train, y_test, encoded_labels """ encoder = LabelEncoder() X = data.iloc[:, :-1] y = data.iloc[:, -1] if labels is not None: encoded_labels = encoder.fit_transform(labels) else: encoder.fit(y) encoded_labels = None X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state) scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) y_train = encoder.transform(y_train.values.ravel()) y_test = encoder.transform(y_test.values.ravel()) return X_train, X_test, y_train, y_test, encoded_labels