From e44671c2596ee48a31c98773e0acd082f3b57547 Mon Sep 17 00:00:00 2001 From: Aleksey Filippov Date: Wed, 12 Mar 2025 13:43:32 +0400 Subject: [PATCH] Improve read dataset function --- dt-cart/backend/dataset/__init__.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/dt-cart/backend/dataset/__init__.py b/dt-cart/backend/dataset/__init__.py index 4d40140..244da5f 100644 --- a/dt-cart/backend/dataset/__init__.py +++ b/dt-cart/backend/dataset/__init__.py @@ -3,6 +3,7 @@ import uuid import pandas as pd from pandas import DataFrame +from pandas.errors import ParserError from sklearn.model_selection import train_test_split from werkzeug import utils @@ -33,10 +34,21 @@ class Dataset: return file_name def read(self, params: DatasetParams) -> DataFrame: - df = pd.read_csv(self.__file_name, sep=params.sep, decimal=params.decimal) + df = None + try: + df = pd.read_csv(self.__file_name, sep=params.sep, decimal=params.decimal) + except ParserError: + raise Exception( + "Can't parse dataset. Try to use correct 'sep' and 'decimal' values." + ) + if df.columns.size < 2: + raise Exception( + "Dataset contains less than 2 columns. " + "Try to use correct 'sep' parameter value." + ) + params.target = params.target or df.columns[-1] if params.input is not None: return df[params.input + [params.target]] - return df def __split( @@ -46,9 +58,8 @@ class Dataset: random_state: int, is_classification: bool = False, ) -> SplittedDataset: - target = params.target or data.columns[-1] X = data.drop([params.target], axis=1) - y = data[[target]] + y = data[[params.target]] stratify = None if not is_classification else y X_train, X_test, y_train, y_test = train_test_split( X,