Source code for watex.base

# -*- coding: utf-8 -*-
#   License: BSD-3-Clause
#   Author: LKouadio <etanoyau@gmail.com>

from __future__ import annotations 
import re 
import sys 
import inspect 
import itertools
import numpy as np
from collections import defaultdict
from warnings import warn

from ._watexlog import  watexlog
from ._docstring import ( 
    DocstringComponents,
    _core_docs
    )
from ._typing import ( 
    List, 
    Optional, 
    DataFrame , 
    Tuple 
    )
from .exlib.sklearn import ( 
    clone, 
    LabelEncoder, 
    _name_estimators , 
    BaseEstimator, 
    ClassifierMixin, 
    accuracy_score, 
    recall_score, 
    precision_score, 
    roc_auc_score,
    train_test_split
    )  
from .exceptions import NotFittedError
from .utils.coreutils import _is_readable 
from .utils.funcutils import (
    _assert_all_types,  
    repr_callable_obj, 
    smart_strobj_recognition, 
    smart_format ,
    sanitize_frame_cols
    )
from .utils._dependency import import_optional_dependency
from .utils.validator import ( 
    array_to_frame, 
    check_array, 
    check_X_y, 
    get_estimator_name
    )

__all__=[
    "Data", 
    "Missing", 
    "AdalineGradientDescent", 
    "AdalineStochasticGradientDescent",
    "SequentialBackwardSelection",
    "MajorityVoteClassifier", 
    "GreedyPerceptron", 
    "existfeatures", 
    "selectfeatures" , 
    "get_params" 
    ]

# +++ add base documentations +++
_base_params = dict ( 
    axis="""
axis: {0 or 'index', 1 or 'columns'}, default 0
    Determine if rows or columns which contain missing values are 
    removed.
    * 0, or 'index' : Drop rows which contain missing values.
    * 1, or 'columns' : Drop columns which contain missing value.
    Changed in version 1.0.0: Pass tuple or list to drop on multiple 
    axes. Only a single axis is allowed.    
    """, 
    columns="""
columns: str or list of str 
    columns to replace which contain the missing data. Can use the axis 
    equals to '1'.
    """, 
    name="""
name: str, :attr:`pandas.Series.name`
    A singluar column name. If :class:`pandas.Series` is given, 'name'  
    denotes the attribute of the :class:`pandas.Series`. Preferably `name`
    must correspond to the label name of the target. 
    """, 
    sample="""
sample: int, Optional, 
    Number of row to visualize or the limit of the number of sample to be 
    able to see the patterns. This is usefull when data is composed of 
    many rows. Skrunked the data to keep some sample for visualization is 
    recommended.  ``None`` plot all the samples ( or examples) in the data     
    """, 
    kind="""
kind: str, Optional 
    type of visualization. Can be ``dendrogramm``, ``mbar`` or ``bar``. 
    ``corr`` plot  for dendrogram , :mod:`msno` bar,  :mod:`plt`
    and :mod:`msno` correlation  visualization respectively: 
        * ``bar`` plot counts the  nonmissing data  using pandas
        *  ``mbar`` use the :mod:`msno` package to count the number 
            of nonmissing data. 
        * dendrogram`` show the clusterings of where the data is missing. 
            leaves that are the same level predict one onother presence 
            (empty of filled). The vertical arms are used to indicate how  
            different cluster are. short arms mean that branch are 
            similar. 
        * ``corr` creates a heat map showing if there are correlations 
            where the data is missing. In this case, it does look like 
            the locations where missing data are corollated.
        * ``None`` is the default vizualisation. It is useful for viewing 
            contiguous area of the missing data which would indicate that 
            the missing data is  not random. The :code:`matrix` function 
            includes a sparkline along the right side. Patterns here would 
            also indicate non-random missing data. It is recommended to limit 
            the number of sample to be able to see the patterns. 
    Any other value will raise an error. 
    """, 
    inplace="""
inplace: bool, default False
    Whether to modify the DataFrame rather than creating a new one.    
    """
 )

_param_docs = DocstringComponents.from_nested_components(
    core=_core_docs["params"],
    base = DocstringComponents(_base_params)
    )
# +++ end base documentations +++

_logger = watexlog().get_watex_logger(__name__)

class _Base:
    """Base class for all classes in watex for parameters retrievals

    Notes
    -----
    All class defined should specify all the parameters that can be set
    at the class level in their ``__init__`` as explicit keyword
    arguments (no ``*args`` or ``**kwargs``).
    """

    @classmethod
    def _get_param_names(cls):
        """Get parameter names for the estimator"""
        # fetch the constructor or the original constructor before
        # deprecation wrapping if any
        init = getattr(cls.__init__, "deprecated_original", cls.__init__)
        if init is object.__init__:
            # No explicit constructor to introspect
            return []

        # introspect the constructor arguments to find the model parameters
        # to represent
        init_signature = inspect.signature(init)
        # Consider the constructor parameters excluding 'self'
        parameters = [
            p
            for p in init_signature.parameters.values()
            if p.name != "self" and p.kind != p.VAR_KEYWORD
        ]
        for p in parameters:
            if p.kind == p.VAR_POSITIONAL:
                raise RuntimeError(
                    "watex classes should always "
                    "specify their parameters in the signature"
                    " of their __init__ (no varargs)."
                    " %s with constructor %s doesn't "
                    " follow this convention." % (cls, init_signature)
                )
        # Extract and sort argument names excluding 'self'
        return sorted([p.name for p in parameters])

    def get_params(self, deep=True):
        """
        Get parameters for this estimator.

        Parameters
        ----------
        deep : bool, default=True
            If True, will return the parameters for this class and
            contained subobjects.

        Returns
        -------
        params : dict
            Parameter names mapped to their values.
        """
        out = dict()
        for key in self._get_param_names():
            value = getattr(self, key)
            if deep and hasattr(value, "get_params"):
                deep_items = value.get_params().items()
                out.update((key + "__" + k, val) for k, val in deep_items)
            out[key] = value
        return out

    def set_params(self, **params):
        """Set the parameters of this estimator.

        The method works on simple classes as well as on nested objects
        (such as :class:`~sklearn.pipeline.Pipeline`). The latter have
        parameters of the form ``<component>__<parameter>`` so that it's
        possible to update each component of a nested object.

        Parameters
        ----------
        **params : dict
            Estimator parameters.

        Returns
        -------
        self : estimator instance
            Estimator instance.
        """
        if not params:
            # Simple optimization to gain speed (inspect is slow)
            return self
        valid_params = self.get_params(deep=True)

        nested_params = defaultdict(dict)  # grouped by prefix
        for key, value in params.items():
            key, delim, sub_key = key.partition("__")
            if key not in valid_params:
                local_valid_params = self._get_param_names()
                raise ValueError(
                    f"Invalid parameter {key!r} for estimator {self}. "
                    f"Valid parameters are: {local_valid_params!r}."
                )

            if delim:
                nested_params[key][sub_key] = value
            else:
                setattr(self, key, value)
                valid_params[key] = value

        for key, sub_params in nested_params.items():
            valid_params[key].set_params(**sub_params)

        return self
    

[docs]
class Data: 
    def __init__ (self, verbose: int =0): 
        self._logging= watexlog().get_watex_logger(self.__class__.__name__)
        self.verbose=verbose 
        self.data_=None 
        
    @property 
    def data (self ):
        """ return verified data """
        return self.data_ 
    @data.setter 
    def data (self, d):
        """ Read and parse the data"""
        self.data_ = _is_readable (d) 
        
    @property 
    def describe (self): 
        """ Get summary stats  as well as see the cound of non-null data.
        Here is the default behaviour of the method i.e. it is to only report  
        on numeric columns. To have have full control, do it manually by 
        yourself. 
        
        """
        return self.data.describe() 
    

[docs]
    def fit(self, data: str | DataFrame=None):
        """ Read, assert and fit the data.
        
        Parameters 
        ------------
        data: Dataframe or shape (M, N) from :class:`pandas.DataFrame` 
            Dataframe containing samples M  and features N
        
        Returns 
        ---------
        :class:`Data` instance
            Returns ``self`` for easy method chaining.
            
        """ 
        
        if data is not None: 
            self.data = data 
        check_array(
            self.data, 
            force_all_finite='allow-nan', 
            dtype =object , 
            input_name='Data', 
            to_frame =True 
            )
        # for consistency if not a frame, set to aframe 
        self.data = array_to_frame (
            self.data, to_frame = True , input_name= 'col_', force =True 
            ) 
        data= sanitize_frame_cols (self.data, fill_pattern='_' ) 
        for col in data.columns :
            setattr (self, col, data[col]) 
            
        return self 

    

[docs]
    def shrunk (self, 
                columns: list[str], 
                data: str | DataFrame = None, 
                **kwd 
                ):
        """ Reduce the data with importance features
        
        Parameters 
        ------------
        data: Dataframe or shape (M, N) from :class:`pandas.DataFrame` 
            Dataframe containing samples M  and features N
        
        columns: str or list of str 
            Columns or features to keep in the datasets

        kwd: dict, 
        additional keywords arguments from :func:`watex.utils.mlutils.selectfeatures`
 
        Returns 
        ---------
        :class:`Data` instance
            Returns ``self`` for easy method chaining.
        
        """ 
        self.inspect 

        self.data = selectfeatures(
            self.data , features = columns, **kwd)
  
        return self 

    
    @property 
    def inspect(self): 
        """ Inspect data and trigger plot after checking the data entry. 
        Raises `NotFittedError` if `ExPlot` is not fitted yet."""
        
        msg = ( "{dobj.__class__.__name__} instance is not fitted yet."
               " Call 'fit' with appropriate arguments before using"
               " this method"
               )
        
        if self.data_ is None: 
            raise NotFittedError(msg.format(
                dobj=self)
            )
        return 1 
    

[docs]
    def profilingReport (self, data: str | DataFrame= None, **kwd):
        """Generate a report in a notebook. 
        
        It will summarize the types of the columns and allow yuou to view 
        details of quatiles statistics, a histogram, common values and extreme 
        values. 
        
        Parameters 
        ------------
        data: Dataframe or shape (M, N) from :class:`pandas.DataFrame` 
            Dataframe containing samples M  and features N
        
        Returns 
        ---------
        :class:`Data` instance
            Returns ``self`` for easy method chaining.
        
        Examples 
        ---------
        >>> from watex.base import Data 
        >>> Data().fit(data).profilingReport()
        
        """
        extra_msg =("'Data.profilingReport' method uses 'pandas-profiling'"
                    " as a dependency.")
        import_optional_dependency("pandas_profiling", extra=extra_msg ) 

        self.inspect
        
        self.data = data or self.data 

        try : 
           from pandas_profiling import ProfileReport
        except ImportError:
            
            msg=(f"Missing of 'pandas_profiling package. {extra_msg}"
                  " Cannot plot profiling report. Install it using pip"
                  " or conda.")
            warn(msg)
            raise ImportError (msg)

        return ProfileReport( self.data , **kwd)

    

[docs]
    def rename (self, 
                data: str | DataFrame= None, 
                columns: List[str]=None, 
                pattern:Optional[str] = None
                ): 
        """ 
        rename columns of the dataframe with columns in lowercase and spaces 
        replaced by underscores. 
        
        Parameters 
        -----------
        data: Dataframe of shape (M, N) from :class:`pandas.DataFrame` 
            Dataframe containing samples M  and features N
        
        columns: str or list of str, Optional 
            the  specific columns in dataframe to renames. However all columns 
            is put in lowercase. If columns not in dataframe, error raises.  
            
        pattern: str, Optional, 
            Regular expression pattern to strip the data. By default, the 
            pattern is ``'[ -@*#&+/]'``.
        
        Return
        -------
        ``self``: :class:`~watex.base.Data` instance 
            returns ``self`` for easy method chaining.
        
        """
        pattern = str (pattern)
        
        if pattern =='None': 
            pattern =  r'[ -@*#&+/]'
        regex =re.compile (pattern, flags=re.IGNORECASE)
        
        if data is not None: 
            self.data = data 
            
        self.data.columns= self.data.columns.str.strip() 
        if columns is not None: 
            existfeatures(self.data, columns, 'raise')
            
        if columns is not None: 
            self.data[columns].columns = self.data[columns].columns.str.lower(
                ).map(lambda o: regex.sub('_', o))
        if columns is None: 
            self.data.columns = self.data.columns.str.lower().map(
                lambda o: regex.sub('_', o))
        
        return self 

    
    #XXX TODO # use logical and to quick merge two frames 

[docs]
    def merge (self) : 
        """ Merge two series whatever the type with operator `&&`. 
        
        When series as dtype object as non numeric values, dtypes should be 
        change into a object 
        """

        # try : 
        #     self.data []
        
    # __and__= __rand__ = merge 
    

[docs]
    def drop ( 
            self, 
            labels: list[str |int] = None, 
            columns: List[str]=None,  
            inplace:bool = False, 
            axis:int = 0 , **kws 
            ): 
        """ Drop specified labels from rows or columns.

        Remove rows or columns by specifying label names and corresponding 
        axis, or by specifying directly index or column names. When using a 
        multi-index, labels on different levels can be removed by specifying 
        the level.
        
        Parameters 
        -----------
        labels: single label or list-like
            Index or column labels to drop. A tuple will be used as a single 
            label and not treated as a list-like.
            
        axis: {0 or 'index', 1 or 'columns'}, default 0
            Whether to drop labels from the index (0 or 'index') 
            or columns (1 or 'columns').
            
        columns: single label or list-like
            Alternative to specifying axis 
            (labels, axis=1 is equivalent to columns=labels)
        kws: dict, 
            Additionnal keywords arguments passed to :meth:`pd.DataFrame.drop`.
            
        Returns 
        ----------
        DataFrame or None
            DataFrame without the removed index or column labels or 
            None if `inplace` equsls to ``True``.

        """
        self.inspect 
  
        data = self.data.drop(labels= labels,  inplace = inplace, 
                       columns = columns , axis =axis , **kws )
        return data 

    
    
        
    def __repr__(self):
        """ Pretty format for programmer guidance following the API... """
        return repr_callable_obj  (self, skip ='y') 
       
    def __getattr__(self, name):
        if name.endswith ('_'): 
            if name not in self.__dict__.keys(): 
                if name in ('data_', 'X_'): 
                    raise NotFittedError (
                        f'Fit the {self.__class__.__name__!r} object first'
                        )
                
        rv = smart_strobj_recognition(name, self.__dict__, deep =True)
        appender  = "" if rv is None else f'. Do you mean {rv!r}'
        
        raise AttributeError (
            f'{self.__class__.__name__!r} object has no attribute {name!r}'
            f'{appender}{"" if rv is None else "?"}'
            ) 

        
Data.__doc__="""\
Data base class

Typically, we train a model with a matrix of data. Note that pandas Dataframe 
is the most used because it is very nice to have columns lables even though 
Numpy arrays work as well. 

For supervised Learning for instance, suc as regression or clasification, our 
intent is to have a function that transforms features into a label. If we 
were to write this as an algebra formula, it would be look like:
    
.. math::
    
    y = f(X)

:code:`X` is a matrix. Each row represent a `sample` of data or information 
about individual. Every columns in :code:`X` is a `feature`.The output of 
our function, :code:`y`, is a vector that contains labels (for classification)
or values (for regression). 

In Python, by convention, we use the variable name :code:`X` to hold the 
sample data even though the capitalization of variable is a violation of  
standard naming convention (see PEP8). 

Parameters 
-----------
{params.core.data}
{params.base.columns}
{params.base.axis}
{params.base.sample}
{params.base.kind}
{params.base.inplace}
{params.core.verbose}

Returns
-------
{returns.self}
   
Examples
--------
.. include:: ../docs/data.rst

""".format(
    params=_param_docs,
    returns=_core_docs["returns"],
)
 

[docs]
class Missing (Data) : 
    """ Deal with missing values in Data 
    
    Most algorithms will not work with missing data. Notable exceptions are the 
    recent boosting libraries such as the XGBoost 
    (:doc:`watex.documentation.xgboost.__doc__`) CatBoost and LightGBM. 
    As with many things in machine learning , there are no hard answaers for how 
    to treat a missing data. Also, missing data could  represent different 
    situations. There are three warious way to handle missing data:: 
        
        * Remove any row with missing data 
        * Remove any columns with missing data 
        * Impute missing values 
        * Create an indicator columns to indicator data was missing 
    
    Parameters
    ----------- 
    in_percent: bool, 
        give the statistic of missing data in percentage if ser to ``True``. 
        
    sample: int, Optional, 
        Number of row to visualize or the limit of the number of sample to be 
        able to see the patterns. This is usefull when data is composed of 
        many rows. Skrunked the data to keep some sample for visualization is 
        recommended.  ``None`` plot all the samples ( or examples) in the data 
    kind: str, Optional 
        type of visualization. Can be ``dendrogramm``, ``mbar`` or ``bar``. 
        ``corr`` plot  for dendrogram , :mod:`msno` bar,  :mod:`plt`
        and :mod:`msno` correlation  visualization respectively: 
            
            * ``bar`` plot counts the  nonmissing data  using pandas
            *  ``mbar`` use the :mod:`msno` package to count the number 
                of nonmissing data. 
            * dendrogram`` show the clusterings of where the data is missing. 
                leaves that are the same level predict one onother presence 
                (empty of filled). The vertical arms are used to indicate how  
                different cluster are. short arms mean that branch are 
                similar. 
            * ``corr` creates a heat map showing if there are correlations 
                where the data is missing. In this case, it does look like 
                the locations where missing data are corollated.
            * ``None`` is the default vizualisation. It is useful for viewing 
                contiguous area of the missing data which would indicate that 
                the missing data is  not random. The :code:`matrix` function 
                includes a sparkline along the right side. Patterns here would 
                also indicate non-random missing data. It is recommended to limit 
                the number of sample to be able to see the patterns. 
   
        Any other value will raise an error 
    
    Examples 
    --------
    >>> from watex.base import Missing
    >>> data ='data/geodata/main.bagciv.data.csv' 
    >>> ms= Missing().fit(data) 
    >>> ms.plot_.fig_size = (12, 4 ) 
    >>> ms.plot () 
    
    """
    def __init__(self,
                   in_percent = False, 
                   sample = None, 
                   kind = None, 
                   drop_columns: List[str]=None,
                   **kws): 
  
        self.in_percent = in_percent
        self.kind = kind  
        self.sample= sample
        self.drop_columns=drop_columns 
        self.isnull_ = None
        
        super().__init__(**kws)
        
    @property 
    def isnull(self):
        """ Check the mean values  in the data  in percentge"""
        self.isnull_= self.data.isnull().mean(
            ) * 1e2  if self.in_percent else self.data.isnull().mean()
        
        return self.isnull_



[docs]
    def plot(self, figsize:Tuple [int] = None,  **kwd ):
        """
        Vizualize patterns in the missing data.
        
        Parameters 
        ------------
        data: Dataframe of shape (M, N) from :class:`pandas.DataFrame` 
            Dataframe containing samples M  and features N
        
        kind: str, Optional 
            kind of visualization. Can be ``dendrogramm``, ``mbar`` or ``bar`` plot 
            for dendrogram , :mod:`msno` bar and :mod:`plt` visualization 
            respectively: 
                
                * ``bar`` plot counts the  nonmissing data  using pandas
                *  ``mbar`` use the :mod:`msno` package to count the number 
                    of nonmissing data. 
                * dendrogram`` show the clusterings of where the data is missing. 
                    leaves that are the same level predict one onother presence 
                    (empty of filled). The vertical arms are used to indicate how  
                    different cluster are. short arms mean that branch are 
                    similar. 
                * ``corr` creates a heat map showing if there are correlations 
                    where the data is missing. In this case, it does look like 
                    the locations where missing data are corollated.
                * ``None`` is the default vizualisation. It is useful for viewing 
                    contiguous area of the missing data which would indicate that 
                    the missing data is  not random. The :code:`matrix` function 
                    includes a sparkline along the right side. Patterns here would 
                    also indicate non-random missing data. It is recommended to limit 
                    the number of sample to be able to see the patterns. 
       
                Any other value will raise an error 
            
        sample: int, Optional
            Number of row to visualize. This is usefull when data is composed of 
            many rows. Skrunked the data to keep some sample for visualization is 
            recommended.  ``None`` plot all the samples ( or examples) in the data 
            
        kws: dict 
            Additional keywords arguments of :mod:`msno.matrix` plot. 

        Return
        -------
        ``self``: :class:`~watex.base.Missing` instance 
            returns ``self`` for easy method chaining.
            
        
        Examples 
        --------
        >>> from watex.base import Missing
        >>> data ='data/geodata/main.bagciv.data.csv' 
        >>> ms= Missing().fit(data) 
        >>> ms.plot(figsize = (12, 4 ) ) 

    
        """
        self.inspect 
        from .view.plot import ExPlot

        
        ExPlot(fig_size=figsize).fit(self.data).plotmissing( 
            kind =  self.kind, sample = self.sample,**kwd )
        return  self 


    @property 
    def get_missing_columns(self): 
        """ return columns with Nan Values """
        return list(self.data.columns [self.data.isna().any()]) 
    


[docs]
    def drop (self, 
              data : str | DataFrame =None,  
              columns: List[str] = None, 
              inplace = False, 
              axis = 1 , 
              **kwd
              ): 
        """Remove missing data 
        
        Parameters 
        -----------
        data: Dataframe of shape (M, N) from :class:`pandas.DataFrame` 
            Dataframe containing samples M  and features N
        
        columns: str or list of str 
            columns to drop which contain the missing data. Can use the axis 
            equals to '1'.
            
        axis: {0 or 'index', 1 or 'columns'}, default 0
            Determine if rows or columns which contain missing values are 
            removed.
            * 0, or 'index' : Drop rows which contain missing values.
        
            * 1, or 'columns' : Drop columns which contain missing value.
            Changed in version 1.0.0: Pass tuple or list to drop on multiple 
            axes. Only a single axis is allowed.
        
        how: {'any', 'all'}, default 'any'
            Determine if row or column is removed from DataFrame, when we 
            have at least one NA or all NA.
            
            * 'any': If any NA values are present, drop that row or column.
            * 'all' : If all values are NA, drop that row or column.
            
        thresh: int, optional
            Require that many non-NA values. Cannot be combined with how.
        
        subset: column label or sequence of labels, optional
            Labels along other axis to consider, e.g. if you are dropping rows 
            these would be a list of columns to include.
        
        inplace: bool, default False
            Whether to modify the DataFrame rather than creating a new one.
            
        Returns 
        -------
        ``self``: :class:`~watex.base.Missing` instance 
            returns ``self`` for easy method chaining.
            
        """
        if data is not None: 
            self.data = data 
            
        self.inspect 
        if columns is not None: 
            self.drop_columns = columns 
            
        existfeatures(self.data , self.drop_columns, error ='raise')
        
        if self.drop_columns is None: 
            if inplace : 
                self.data.dropna (axis = axis , inplace = True, **kwd )
            else :  self.data = self.data .dropna (
                axis = axis , inplace = False, **kwd )
            
        elif self.drop_columns is not None: 
            if inplace : 
                self.data.drop (columns = self.drop_columns , 
                                axis = axis, inplace = True, 
                                **kwd)
            else : 
                self.data.drop (columns = self.columns , axis = axis , 
                                inplace = False , **kwd)

        return self 

    
    @property 
    def sanity_check (self): 
        """Ensure that we have deal with all missing values. The following 
        code returns a single boolean if there is any cell that is missing 
        in a DataFrame """
        
        return self.data.isna().any().any() 
    

[docs]
    def replace (self, 
                 data:str |DataFrame = None , 
                 columns: List[str] = None,
                 fill_value: float = None , 
                 new_column_name: str= None, 
                 return_non_null: bool = False, 
                 **kwd): 
        """ 
        Replace the missing values to consider. 
        
        Use the :code:`coalease` function of :mod:`pyjanitor`. It takes a  
        dataframe and a list of columns to consider. This is a similar to 
        functionality found in Excel and SQL databases. It returns the first 
        non null value of each row. 
        
        Parameters 
        -----------
        data: Dataframe of shape (M, N) from :class:`pandas.DataFrame` 
            Dataframe containing samples M  and features N
        
        columns: str or list of str 
            columns to replace which contain the missing data. Can use the axis 
            equals to '1'.
            
        axis: {0 or 'index', 1 or 'columns'}, default 0
            Determine if rows or columns which contain missing values are 
            removed.
            * 0, or 'index' : Drop rows which contain missing values.
        
            * 1, or 'columns' : Drop columns which contain missing value.
            Changed in version 1.0.0: Pass tuple or list to drop on multiple 
            axes. Only a single axis is allowed.
            
         Returns 
         -------
         ``self``: :class:`~watex.base.Missing` instance 
             returns ``self`` for easy method chaining.
             
        """
        
        if data is not None: 
            self.data = data 
            
        self.inspect 
        existfeatures(self.data , columns )
        
        if return_non_null : 
            new_column_name = _assert_all_types(new_column_name, str  )
            
            if 'pyjanitor' not in sys.modules: 
                raise ModuleNotFoundError(" 'pyjanitor' is missing.Install it"
                                          " mannualy using conda or pip.")
            import pyjanitor as jn 
            return jn.coalease (self.data , 
                                columns = columns, 
                                new_column_name = new_column_name, 
                                )
        if fill_value is not None: 
            # fill missing values with a particular values. 
            
            try : 
                self.data = self.data .fillna(fill_value , **kwd)
            except : 
                if 'pyjanitor'  in sys.modules:
                    import pyjanitor as jn 
                    jn.fill_empty ( 
                        self.data , columns = columns or list(self.data.columns), 
                        value = fill_value 
                        )
            
        return self 


    

[docs]
class SequentialBackwardSelection (_Base ):
    r"""
    Sequential Backward Selection (SBS) is a feature selection algorithm which 
    aims to reduce dimensionality of the initial feature subspace with a 
    minimum decay  in the performance of the classifier to improve upon 
    computationan efficiency. In certains cases, SBS can even improve the 
    predictive power of the model if a model suffers from overfitting. 
    
    The idea behind the SBS is simple: it sequentially removes features 
    from the full feature subset until the new feature subspace contains the 
    desired number of features. In order to determine which feature is to be 
    removed at each stage, the criterion fonction :math:`J` is needed for 
    minimization [1]_. 
    Indeed, the criterion calculated from the criteria function can simply be 
    the difference in performance of the classifier before and after the 
    removal of this particular feature. Then, the feature to be remove at each 
    stage can simply be the defined as the feature that maximizes this 
    criterion; or in more simple terms, at each stage, the feature that causes 
    the least performance is eliminated loss after removal. Based on the 
    preceding definition of SBS, the algorithm can be outlibe with a few steps:
        
        - Initialize the algorithm with :math:`k=d`, where :math:`d` is the 
            dimensionality of the full feature space, :math:`X_d`. 
        - Determine the feature :math:`x^{-}`,that maximizes the criterion: 
            :math:`x^{-}= argmax J(X_k-x)`, where :math:`x\in X_k`. 
        - Remove the feature :math:`x^{-}` from the feature set 
            :math:`X_{k+1}= X_k -x^{-}; k=k-1`.
        -Terminate if :math:`k` equals to the number of desired features; 
            otherwise go to the step 2. [2]_ 
            
    Parameters 
    -----------
    estimator: callable or instanciated object,
        callable or instance object that has a fit method. 
    k_features: int, default=1 
        the number of features from where starting the selection. It must be 
        less than the number of feature in the training set, otherwise it 
        does not make sense. 
    scoring: callable or str , default='accuracy'
        metric for scoring. availabe metric are 'precision', 'recall', 
        'roc_auc' or 'accuracy'. Any other metric with raise an errors. 
    test_size : float or int, default=None
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. If ``train_size`` is also None, it will
        be set to 0.25. 
        
    random_state : int, RandomState instance or None, default=None
        Controls the shuffling applied to the data before applying the split.
        Pass an int for reproducible output across multiple function calls.

    References 
    -----------
    .. [1] Raschka, S., Mirjalili, V., 2019. Python Machine Learning, 3rd ed. Packt.
    .. [2] Ferri F., Pudil F., Hatef M., and Kittler J., Comparative study of 
        the techniques for Large-scale feature selection, pages 403-413, 1994.
    
    Attributes 
    -----------
    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.
        
    indices_: tuple of dimensionnality X
        Collect the indices of subset of the best validated models 
        
    subsets_: list, 
        list of `indices_` 
        
    scores_: list, 
        Collection of the scores of the best model got during the
        cross-validating 
        
    k_score_: float, 
        The score of the desired feature. 
        
    Examples
    --------
    >>> from watex.exlib.sklearn import KNeighborsClassifier , train_test_split
    >>> from watex.datasets import fetch_data
    >>> from watex.base import SequentialBackwardSelection
    >>> X, y = fetch_data('bagoue analysed') # data already standardized
    >>> Xtrain, Xt, ytrain,  yt = train_test_split(X, y)
    >>> knn = KNeighborsClassifier(n_neighbors=5)
    >>> sbs= SequentialBackwardSelection (knn)
    >>> sbs.fit(Xtrain, ytrain )

    """
    _scorers = dict (accuracy = accuracy_score , recall = recall_score , 
                   precision = precision_score, roc_auc= roc_auc_score 
                   )
    def __init__ (self, estimator=None , k_features=1 , 
                  scoring ='accuracy', test_size = .25 , 
                  random_state = 42 ): 
        self.estimator=estimator 
        self.k_features=k_features 
        self.scoring=scoring 
        self.test_size=test_size
        self.random_state=random_state 
        

[docs]
    def fit(self, X, y) :
        """  Fit the training data 
        
        Note that SBS splits the datasets into a test and training insite the 
        fit function. :math:`X` is still fed to the algorithm. Indeed, SBS 
        will then create a new training subsets for testing (validation) and 
        training , which is why this test set is also called the validation 
        dataset. This approach is necessary to prevent our original test set 
        to becoming part of the training data. 
        
        Parameters 
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.
        y: array-like, shape (M, ) ``M=m-samples``, 
            train target; Denotes data that may be observed at training time 
            as the dependent variable in learning, but which is unavailable 
            at prediction time, and is usually the target of prediction. 
        
        Returns 
        --------
        self: `SequentialBackwardSelection` instance 
            returns ``self`` for easy method chaining.
        
        """
        X, y = check_X_y(
            X, 
            y, 
            estimator = get_estimator_name(self ), 
            to_frame= True, 
            )
        
        self._check_sbs_args(X)
        
        if hasattr(X, 'columns'): 
            self.feature_names_in = list(X.columns )
            X = X.values 
            
        Xtr, Xt,  ytr, yt = train_test_split(X, y , test_size=self.test_size, 
                                            random_state=self.random_state 
                                            )
        dim = Xtr.shape [1] 
        self.indices_= tuple (range (dim))
        self.subsets_= [self.indices_]
        score = self._compute_score(Xtr, Xt,  ytr, yt, self.indices_)
        self.scores_=[score]
        # compute the score for p indices in 
        # list indices in dimensions 
        while dim > self.k_features: 
            scores , subsets = [], []
            for p in itertools.combinations(self.indices_, r=dim-1):
                score = self._compute_score(Xtr, Xt,  ytr, yt, p)
                scores.append (score) 
                subsets.append (p)
            
            best = np.argmax (scores) 
            self.indices_= subsets [best]
            self.subsets_.append(self.indices_)
            dim -=1 # go back for -1 
            
            self.scores_.append (scores[best])
            
        # set  the k_feature score 
        self.k_score_= self.scores_[-1]
        
        return self 

        

[docs]
    def transform (self, X): 
        """ Transform the training set 
        
        Parameters 
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.
        Returns 
        -------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            New transformed training set with selected features columns 
        
        """
        if not hasattr (self, 'indices_'): 
            raise NotFittedError(
                "Can't call transform with estimator not fitted yet."
                " Fit estimator by calling the 'fit' method with appropriate"
                " arguments.")
        return X[:, self.indices_]

    
    def _compute_score (self, Xtr, Xt,  ytr, yt, indices):
        """ Compute score from splitting `X` and indices """
        self.estimator.fit(Xtr[:, indices], ytr)
        y_pred = self.estimator.predict (Xt [:, indices])
        score = self.scoring (yt, y_pred)
        
        return score 

    def _check_sbs_args (self, X): 
        """ Assert SBS main arguments  """
        
        if not hasattr(self.estimator, 'fit'): 
            raise TypeError ("Estimator must have a 'fit' method.")
        try : 
            self.k_features = int (self.k_features)
        except  Exception as err: 
            raise TypeError ("Expect an integer for number of feature k,"
                             f" got {type(self.k_features).__name__!r}"
                             ) from err
        if self.k_features > X.shape [1] :
            raise ValueError ("Too many number of features."
                              f" Expect max-features={X.shape[1]}")
        if  ( 
            callable(self.scoring) 
            or inspect.isfunction ( self.scoring )
            ): 
            self.scoring = self.scoring.__name__.replace ('_score', '')
        
        if self.scoring not in self._scorers.keys(): 
            raise ValueError (
                f"Accept only scorers {list (self._scorers.keys())}"
                f"for scoring, not {self.scoring!r}")
            
        self.scoring = self._scorers[self.scoring] 
        
        self.scorer_name_ = self.scoring.__name__.replace (
            '_score', '').title ()
        
    def __repr__(self): 
        """ Represent the  Sequential Backward Selection class """
        get_params = self.get_params()  
        get_params.pop('scoring')
        if hasattr (self, 'scorer_name_'): 
            get_params ['scoring'] =self.scorer_name_ 
        
        tup = tuple (f"{key}={val}".replace ("'", '') for key, val in 
                     get_params.items() )
        
        return self.__class__.__name__ + str(tup).replace("'", "") 

    

[docs]
class GreedyPerceptron (_Base): 
    r""" Perceptron classifier 
    
    Inspired from Rosenblatt concept of perceptron rules. Indeed, Rosenblatt 
    published the first concept of perceptron learning rule based on the MCP 
    (McCulloth-Pitts) neuron model. With the perceptron rule, Rosenblatt 
    proposed an algorithm thar would automatically learn the optimal weights 
    coefficients that would them be multiplied by the input features in order 
    to make the decision of whether a neuron fires (transmits a signal) or not. 
    In the context of supervised learning and classification, such algirithm 
    could them be used to predict whether a new data points belongs to one 
    class or the other. 
    
    Rosenblatt initial perceptron rule and the perceptron algorithm can be 
    summarized by the following steps: 
        - initialize the weights at 0 or small random numbers. 
        - For each training examples, :math:`x^{(i)}`:
            - Compute the output value :math:`\hat{y}`. 
            - update the weighs. 
    the weights :math:`w` vector can be fromally written as:
        
    .. math:: 
        
        w := w_j + \delta w_j
            
    Parameters 
    -----------
    eta: float, 
        Learning rate between (0. and 1.) 
    n_iter: int , 
        number of iteration passes over the training set 
    random_state: int, default is 42
        random number generator seed for random weight initialization.
        
    Attributes 
    ----------
    w_: Array-like, 
        Weight after fitting 
    errors_: list 
        Number of missclassification (updates ) in each epoch
    
        
    References
    ------------
    .. [1] Rosenblatt F, 1957, The perceptron:A perceiving and Recognizing
        Automaton,Cornell Aeoronautical Laboratory 1957
    .. [2] McCulloch W.S and W. Pitts, 1943. A logical calculus of Idea of 
        Immanent in Nervous Activity, Bulleting of Mathematical Biophysics, 
        5(4): 115-133, 1943.
    
    """
    def __init__(self, eta:float = .01 , n_iter: int = 50 , 
                 random_state:int = 42 ) :
        super().__init__()
        self.eta=eta 
        self.n_iter=n_iter 
        self.random_state=random_state 
        

[docs]
    def fit(self , X, y ): 
        """ Fit the training data 
        
        Parameters 
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.
        y: array-like, shape (M, ) ``M=m-samples``, 
            train target; Denotes data that may be observed at training time 
            as the dependent variable in learning, but which is unavailable 
            at prediction time, and is usually the target of prediction. 
        
        Returns 
        --------
        self: `Perceptron` instance 
            returns ``self`` for easy method chaining.
        """
        X, y = check_X_y(
            X, 
            y, 
            estimator = get_estimator_name(self ), 
            to_frame= True, 
            )
        
        rgen = np.random.RandomState(self.random_state)
        
        self.w_ = rgen.normal(loc=0. , scale =.01 , size = 1 + X.shape[1]
                              )
        self.errors_ =list() 
        for _ in range (self.n_iter):
            errors =0 
            for xi, target in zip(X, y):
                update = self.eta * (target - self.predict(xi))
                self.w_[1:] += update * xi 
                self.w_[0] += update 
                errors  += int(update !=0.) 
            self.errors_.append(errors)
        
        return self 

    

[docs]
    def net_input(self, X) :
        """ Compute the net input """
        return np.dot (X, self.w_[1:]) + self.w_[0] 



[docs]
    def predict (self, X): 
        """
       Predict the  class label after unit step
        
        Parameters
        ----------
        X : Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.

        Returns
        -------
        ypred: predicted class label after the unit step  (1, or -1)

        """      
        if not hasattr (self, 'w_'): 
            raise NotFittedError("Can't call 'predict' method with estimator"
                                 " not fitted yet. Fit estimator by calling"
                                 " the 'fit' method first.")
        return np.where (self.net_input(X) >=.0 , 1 , -1 )

    
    def __repr__(self): 
        """ Represent the output class """
        
        tup = tuple (f"{key}={val}".replace ("'", '') for key, val in 
                     self.get_params().items() )
        
        return self.__class__.__name__ + str(tup).replace("'", "") 

    


[docs]
class MajorityVoteClassifier (BaseEstimator, ClassifierMixin ): 
    r"""
    A majority vote Ensemble classifier 
    
    Combine different classification algorithms associate with individual 
    weights for confidence. The goal is to build a stronger meta-classifier 
    that balance out of the individual classifiers weaknes on a particular  
    datasets. In more precise in mathematical terms, the weighs majority 
    vote can be expressed as follow: 
        
    .. math:: 
        
        \hat{y} = arg \max{i} \sum {j=1}^{m} w_j\chi_A (C_j(x)=1)
    
    where :math:`w_j` is a weight associated with a base classifier, :math:`C_j`; 
    :math:`\hat{y}` is the predicted class label of the ensemble. :math:`A` is 
    the set of the unique class label; :math:`\chi_A` is the characteristic 
    function or indicator function which returns 1 if the predicted class of 
    the jth clasifier matches :math:`i(C_j(x)=1)`. For equal weights, the equation 
    is simplified as follow: 
        
    .. math:: 
        
        \hat{y} = mode {{C_1(x), C_2(x), ... , C_m(x)}}
            
    Parameters 
    ------------
    
    clfs: {array_like}, shape (n_classifiers)
        Differents classifier for ensembles 
        
    vote: str , ['classlabel', 'probability'], default is {'classlabel'}
        If 'classlabel' the prediction is based on the argmax of the class 
        label. Otherwise, if 'probability', the argmax of the sum of the 
        probabilities is used to predict the class label. Note it is 
        recommended for calibrated classifiers. 
        
    weights:{array-like}, shape (n_classifiers, ), Optional, default=None 
        If a list of `int` or `float`, values are provided, the classifier 
        are weighted by importance; it uses the uniform weights if 'weights' is
        ``None``.
        
    Attributes 
    ------------
    classes_: array_like, shape (n_classifiers) 
        array of classifiers withencoded classes labels 
    
    classifiers_: list, 
        list of fitted classifiers 
        
    Examples 
    ---------
    >>> from watex.exlib.sklearn import (
        LogisticRegression,DecisionTreeClassifier ,KNeighborsClassifier, 
         Pipeline , cross_val_score , train_test_split , StandardScaler , 
         SimpleImputer )
    >>> from watex.datasets import fetch_data 
    >>> from watex.base import MajorityVoteClassifier 
    >>> from watex.base import selectfeatures 
    >>> data = fetch_data('bagoue original').get('data=dfy1')
    >>> X0 = data.iloc [:, :-1]; y0 = data ['flow'].values  
    >>> # exclude the categorical value for demonstration 
    >>> # binarize the target y 
    >>> y = np.asarray (list(map (lambda x: 0 if x<=1 else 1, y0))) 
    >>> X = selectfeatures (X0, include ='number')
    >>> X = SimpleImputer().fit_transform (X) 
    >>> X, Xt , y, yt = train_test_split(X, y)
    >>> clf1 = LogisticRegression(penalty ='l2', solver ='lbfgs') 
    >>> clf2= DecisionTreeClassifier(max_depth =1 ) 
    >>> clf3 = KNeighborsClassifier( p =2 , n_neighbors=1) 
    >>> pipe1 = Pipeline ([('sc', StandardScaler()), 
                           ('clf', clf1)])
    >>> pipe3 = Pipeline ([('sc', StandardScaler()), 
                           ('clf', clf3)])
    
    (1) -> Test the each classifier results taking individually 
    
    >>> clf_labels =['Logit', 'DTC', 'KNN']
    >>> # test the results without using the MajorityVoteClassifier
    >>> for clf , label in zip ([pipe1, clf2, pipe3], clf_labels): 
            scores = cross_val_score(clf, X, y , cv=10 , scoring ='roc_auc')
            print("ROC AUC: %.2f (+/- %.2f) [%s]" %(scores.mean(), 
                                                     scores.std(), 
                                                     label))
    ... ROC AUC: 0.91 (+/- 0.05) [Logit]
        ROC AUC: 0.73 (+/- 0.07) [DTC]
        ROC AUC: 0.77 (+/- 0.09) [KNN]
    
    (2) _> Implement the MajorityVoteClassifier
    
    >>> # test the resuls with Majority vote  
    >>> mv_clf = MajorityVoteClassifier(clfs = [pipe1, clf2, pipe3])
    >>> clf_labels += ['Majority voting']
    >>> all_clfs = [pipe1, clf2, pipe3, mv_clf]
    >>> for clf , label in zip (all_clfs, clf_labels): 
            scores = cross_val_score(clf, X, y , cv=10 , scoring ='roc_auc')
            print("ROC AUC: %.2f (+/- %.2f) [%s]" %(scores.mean(), 
                                                     scores.std(), label))
    ... ROC AUC: 0.91 (+/- 0.05) [Logit]
        ROC AUC: 0.73 (+/- 0.07) [DTC]
        ROC AUC: 0.77 (+/- 0.09) [KNN]
        ROC AUC: 0.92 (+/- 0.06) [Majority voting] # give good score & less errors 
    """     
    
    def __init__(self, clfs, weights = None , vote ='classlabel'):
        
        self.clfs=clfs 
        self.weights=weights
        self.vote=vote 
        
        self.classifier_names_={}
  

[docs]
    def fit(self, X, y):
        """
        Fit classifiers 
        
        Parameters
        ----------

        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.
        y: array-like, shape (M, ) ``M=m-samples``
            train target; Denotes data that may be observed at training time 
            as the dependent variable in learning, but which is unavailable 
            at prediction time, and is usually the target of prediction. 
        
        Returns 
        --------
        self: `MajorityVoteClassifier` instance 
            returns ``self`` for easy method chaining.
        """
        X, y = check_X_y(
            X, 
            y, 
            estimator = get_estimator_name(self ), 
            to_frame= True, 
            )
        
        self._check_clfs_vote_and_weights ()
        
        # use label encoder to ensure that class start by 0 
        # which is important for np.argmax call in predict 
        self._labenc = LabelEncoder () 
        self._labenc.fit(y)
        self.classes_ = self._labenc.classes_ 
        
        self.classifiers_ = list()
        for clf in self.clfs: 
            fitted_clf= clone (clf).fit(X, self._labenc.transform(y))
            self.classifiers_.append (fitted_clf ) 
            
        return self 

    
    @property 
    def inspect (self): 
        """ Inspect object whether is fitted or not"""
        msg = ( "{obj.__class__.__name__} instance is not fitted yet."
               " Call 'fit' with appropriate arguments before using"
               " this method"
               )
        
        if not hasattr (self, 'classifiers_'): 
            raise NotFittedError(msg.format(
                obj=self)
            )
        return 1 
    

[docs]
    def predict(self, X):
        """
        Predict the class label of X 
        
        Parameters
        ----------
        
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.
            
        Returns
        -------
        maj_vote:{array_like}, shape (n_examples, )
            Predicted class label array 
        """
        self.inspect 
        
        if self.vote =='proba': 
            maj_vote = np.argmax (self.predict_proba(X), axis =1 )
        if self.vote =='label': 
            # collect results from clf.predict 
            preds = np.asarray(
                [clf.predict(X) for clf in self.classifiers_ ]).T 
            maj_vote = np.apply_along_axis(
                lambda x : np.argmax( 
                    np.bincount(x , weights = self.weights )), 
                    axis = 1 , 
                    arr= preds 
                    
                    )
            maj_vote = self._labenc.inverse_transform(maj_vote )
        
        return maj_vote 

    

[docs]
    def predict_proba (self, X): 
        """
        Predict the class probabilities an return average probabilities which 
        is usefull when computing the the receiver operating characteristic 
        area under the curve (ROC AUC ). 
        
        Parameters
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.

        Returns
        -------
        avg_proba: {array_like }, shape (n_examples, n_classes) 
            weights average probabilities for each class per example. 

        """
        self.inspect 
        probas = np.asarray (
            [ clf.predict_proba(X) for clf in self.classifiers_ ])
        avg_proba = np.average (probas , axis = 0 , weights = self.weights ) 
        
        return avg_proba 

    

[docs]
    def get_params( self , deep = True ): 
        """ Overwrite the get params from `_Base` class  and get 
        classifiers parameters from GridSearch . """
        
        if not deep : 
            return super().get_params(deep =False )
        if deep : 
            out = self.classifier_names_.copy() 
            for name, step in self.classifier_names_.items() : 
                for key, value in step.get_params (deep =True).items (): 
                    out['%s__%s'% (name, key)]= value 
        
        return out 

        
    def _check_clfs_vote_and_weights (self): 
        """ assert the existence of classifiers, vote type and the 
         classfifers weigths """
        l = "https://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html"
        if self.clfs is None: 
            raise TypeError( "Expect at least one classifiers. ")

        if hasattr(self.clfs , '__class__') and hasattr(
                self.clfs , '__dict__'): 
            self.clfs =[self.clfs ]
      
        s = set ([ (hasattr(o, '__class__') and hasattr(o, '__dict__')) for o 
                  in self.clfs])
        
        if  not list(s)[0] or len(s)!=1:
            raise TypeError(
                "Classifier should be a class object, not {0!r}. Please refer"
                " to Scikit-Convention to write your own estimator <{1!r}>."
                .format('type(self.clfs).__name__', l)
                )
        self.classifier_names_ = {
            k : v for k, v  in _name_estimators(self.clfs)
            }
        
        regex= re.compile(r'(class|label|target)|(proba)')
        v= regex.search(self.vote)
        if v  is None : 
            raise ValueError ("Vote argument must be 'probability' or "
                              "'classlabel', got %r"%self.vote )
        if v is not None: 
            if v.group (1) is not None:  
                self.vote  ='label'
            elif v.group(2) is not None: 
                self.vote  ='proba'
           
        if self.weights and len(self.weights)!= len(self.clfs): 
           raise ValueError(" Number of classifier must be consistent with "
                            " the weights. got {0} and {1} respectively."
                            .format(len(self.clfs), len(self.weights))
                            )

            
        

[docs]
class AdalineStochasticGradientDescent (_Base) :
    r""" Adaptative Linear Neuron Classifier  with batch  (stochastic) 
    gradient descent 
    
    A stochastic gradient descent is a popular alternative algorithm which is  
    sometimes also called iterative or online gradient descent [1]_. It updates
    the weights based on the sum of accumulated errors over all training 
    examples :math:`x^{(i)}`: 
        
    .. math:: 
        
        \delta w: \sum{i} (y^{(i)} -\phi( z^{(i)}))x^(i)
            
    the weights are updated incremetally for each training examples: 
        
    .. math:: 
        
        \eta(y^{(i)} - \phi(z^{(i)})) x^{(i)}
            
    Parameters 
    -----------
    eta: float, 
        Learning rate between (0. and 1.) 
    n_iter: int, 
        number of iteration passes over the training set 
    suffle: bool, 
        shuffle training data every epoch if True to prevent cycles. 

    random_state: int, default is 42
        random number generator seed for random weight initialization.
        
    Attributes 
    ----------
    w_: Array-like, 
        Weight after fitting 
    cost_: list 
        Sum of squares cost function (updates ) in each epoch
        
    See also 
    ---------
    AdelineGradientDescent: :class:`~watex.base.AdalineGradientDescent` 
    
    References 
    -----------
    .. [1] Windrow and al., 1960. An Adaptative "Adaline" Neuron Using Chemical
        "Memistors", Technical reports Number, 1553-2,B Windrow and al., 
        standford Electron labs, Standford, CA,October 1960. 
            
    """
    def __init__(self, eta:float = .01 , n_iter: int = 50 , shuffle=True, 
                 random_state:int = 42 ) :
        super().__init__()
        self.eta=eta 
        self.n_iter=n_iter 
        self.shuffle=shuffle 
        self.random_state=random_state 
        
        self.w_initialized =False 
        

[docs]
    def fit(self , X, y ): 
        """ Fit the training data 
        
        Parameters 
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.
        y: array-like, shape (M, ) ``M=m-samples``, 
            train target; Denotes data that may be observed at training time 
            as the dependent variable in learning, but which is unavailable 
            at prediction time, and is usually the target of prediction. 
        
        Returns 
        --------
        self: `Perceptron` instance 
            returns ``self`` for easy method chaining.
        """  
        X, y = check_X_y(
            X, 
            y, 
            estimator = get_estimator_name(self), 
            )
    
        self._init_weights (X.shape[1])
        self.cost_=list() 
        for i in range(self.n_iter ): 
            if self.shuffle: 
                X, y = self._shuffle (X, y) 
            cost =[] 
            for xi , target in zip(X, y) :
                cost.append(self._update_weights(xi, target)) 
            avg_cost = sum(cost)/len(y) 
            self.cost_.append(avg_cost) 
        
        return self 

    
    @property 
    def inspect (self): 
        """ Inspect object whether is fitted or not"""
        msg = ( "{obj.__class__.__name__} instance is not fitted yet."
               " Call 'fit' with appropriate arguments before using"
               " this method"
               )
        
        if not hasattr (self, 'w_'): 
            raise NotFittedError(msg.format(
                obj=self)
            )
        return 1 
    

[docs]
    def partial_fit(self, X, y):
        """
        Fit training data without reinitialising the weights 
        
        Parameters
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.
        y: array-like, shape (M, ) ``M=m-samples``, 
            train target; Denotes data that may be observed at training time 
            as the dependent variable in learning, but which is unavailable 
            at prediction time, and is usually the target of prediction. 
        
        Returns 
        --------
        self: `Perceptron` instance 
            returns ``self`` for easy method chaining.

        """
        X, y = check_X_y(
            X, 
            y, 
            estimator = get_estimator_name(self),  
            )
        
        if not self.w_initialized : 
           self._init_weights (X.shape[1])
          
        if y.ravel().shape [0]> 1: 
            for xi, target in zip(X, y):
                self._update_weights (xi, target) 
        else: 
            self._update_weights (X, y)
                
        return self 

    
    def _shuffle (self, X, y):
        """
        Shuffle training data 
        
        Parameters
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.
        y: array-like, shape (M, ) ``M=m-samples``, 
            train target; Denotes data that may be observed at training time 
            as the dependent variable in learning, but which is unavailable 
            at prediction time, and is usually the target of prediction. 

        Returns
        -------
        Training and target data shuffled  

        """
        r= self.rgen.permutation(len(y)) 
        return X[r], y[r]
    
    def _init_weights (self, m): 
        """
        Initialize weights with small random numbers 

        Parameters
        ----------
        m : int 
           random number for weights initialization .

        """
        self.rgen =  np.random.RandomState(self.random_state)
        self.w_ = self.rgen.normal(loc=.0 , scale=.01, size = 1+ m) 
        self.w_initialized = True 
        
    def _update_weights (self, X, y):
        """
        Adeline learning rules to update the weights 

        Parameters
        ----------
        X : Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set for initializing
        y :array-like, shape (M, ) ``M=m-samples``, 
            train target for initializing 

        Returns
        -------
        cost: list,
            sum-squared errors 

        """
        output = self.activation (self.net_input(X))
        errors =(y - output ) 
        self.w_[1:] += self.eta * X.dot(errors) 
        cost = errors **2 /2. 
        
        return cost 
    

[docs]
    def net_input (self, X):
        """
        Compute the net input X 
        
        Parameters
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.

        Returns
        -------
        weight net inputs 

        """
        self.inspect 
        return np.dot (X, self.w_[1:]) + self.w_[0] 



[docs]
    def activation (self, X):
        """
        Compute the linear activation 

        Parameters
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.

        Returns
        -------
        X: activate NDArray 

        """
        return X 

    

[docs]
    def predict (self, X):
        """
        Predict the  class label after unit step
        
        Parameters
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.

        Returns
        -------
        ypred: predicted class label after the unit step  (1, or -1)
        """
        return np.where (self.activation(self.net_input(X))>=0. , 1, -1)

    
    def __repr__(self): 
        """ Represent the output class """
        
        tup = tuple (f"{key}={val}".replace ("'", '') for key, val in 
                     self.get_params().items() )
        
        return self.__class__.__name__ + str(tup).replace("'", "") 

    

[docs]
class AdalineGradientDescent (_Base): 
    r"""Adaptative Linear Neuron Classifier 
    
    ADAptative LInear NEuron (Adaline) was published by Bernard Widrow and 
    his doctoral studentTeed Hoff only a few uears after Rosenblatt's 
    perceptron algorithm. It can be  considered as impovrment of the latter 
    Windrow and al., 1960.
    
    Adaline illustrates the key concepts of defining and minimizing continuous
    cost function. This lays the groundwork for understanding more advanced 
    machine learning algorithm for classification, such as Logistic Regression, 
    Support Vector Machines,and Regression models.  
    
    The key difference between Adaline rule (also know as the WIdrow-Hoff rule) 
    and Rosenblatt's perceptron is that the weights are updated based on linear 
    activation function rather than unit step function like in the perceptron. 
    In Adaline, this linear activation function :math:`\phi(z)` is simply 
    the identifu function of the net input so that:
        
        .. math:: 
            
            \phi (w^Tx)= w^Tx 
    
    while the linear activation function is used for learning the weights. 
    
    Parameters 
    -----------
    eta: float, 
        Learning rate between (0. and 1.) 
    n_iter: int , 
        number of iteration passes over the training set 
    random_state: int, default is 42
        random number generator seed for random weight initialization.
        
    Attributes 
    ----------
    w_: Array-like, 
        Weight after fitting 
    cost_: list 
        Sum of squares cost function (updates ) in each epoch
        
    
    References 
    -----------
    .. [1] Windrow and al., 1960. An Adaptative "Adeline" Neuron Using Chemical
        "Memistors", Technical reports Number, 1553-2,B Windrow and al., 
        standford Electron labs, Standford, CA,October 1960. 
        
    """
    def __init__(self, eta:float = .01 , n_iter: int = 50 , 
                 random_state:int = 42 ) :
        super().__init__()
        self.eta=eta 
        self.n_iter=n_iter 
        self.random_state=random_state 
        

[docs]
    def fit(self , X, y ): 
        """ Fit the training data 
        
        Parameters 
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.
        y: array-like, shape (M, ) ``M=m-samples``, 
            train target; Denotes data that may be observed at training time 
            as the dependent variable in learning, but which is unavailable 
            at prediction time, and is usually the target of prediction. 
        
        Returns 
        --------
        self: `Perceptron` instance 
            returns ``self`` for easy method chaining.
        """
        X, y = check_X_y(
            X, 
            y, 
            estimator = get_estimator_name(self), 
            )
        
        rgen = np.random.RandomState(self.random_state)
        
        self.w_ = rgen.normal(loc=0. , scale =.01 , size = 1 + X.shape[1]
                              )
        self.cost_ =list()    
        
        for i in range (self.n_iter): 
            net_input = self.net_input (X) 
            output = self.activation (net_input) 
            errors =  ( y -  output ) 
            self.w_[1:] += self.eta * X.T.dot(errors)
            self.w_[0] += self.eta * errors.sum() 
            cost = (errors **2 ).sum() / 2. 
            self.cost_.append(cost) 
        
        return self 

    
    @property 
    def inspect (self): 
        """ Inspect object whether is fitted or not"""
        msg = ( "{obj.__class__.__name__} instance is not fitted yet."
               " Call 'fit' with appropriate arguments before using"
               " this method"
               )
        
        if not hasattr (self, 'w_'): 
            raise NotFittedError(msg.format(
                obj=self)
            )
        return 1 
    

[docs]
    def net_input (self, X):
        """
        Compute the net input X 
        
        Parameters
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.

        Returns
        -------
       weight net inputs 

        """
        self.inspect 
        return np.dot (X, self.w_[1:]) + self.w_[0] 



[docs]
    def activation (self, X):
        """
        Compute the linear activation 

        Parameters
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.

        Returns
        -------
        X: activate NDArray 

        """
        return X 

    

[docs]
    def predict (self, X):
        """
        Predict the  class label after unit step
        
        Parameters
        ----------
        X:  Ndarray ( M x N matrix where ``M=m-samples``, & ``N=n-features``)
            Training set; Denotes data that is observed at training and 
            prediction time, used as independent variables in learning. 
            When a matrix, each sample may be represented by a feature vector, 
            or a vector of precomputed (dis)similarity with each training 
            sample. :code:`X` may also not be a matrix, and may require a 
            feature extractor or a pairwise metric to turn it into one  before 
            learning a model.

        Returns
        -------
        ypred: predicted class label after the unit step  (1, or -1)
        """
        return np.where (self.activation(self.net_input(X))>=0. , 1, -1)

    
    def __repr__(self): 
        """ Represent the output class """
        
        tup = tuple (f"{key}={val}".replace ("'", '') for key, val in 
                     self.get_params().items() )
        
        return self.__class__.__name__ + str(tup).replace("'", "") 

        

[docs]
def get_params (obj: object 
                ) -> dict: 
    """
    Get object parameters. 
    
    Object can be callable or instances 
    
    :param obj: object , can be callable or instance 
    
    :return: dict of parameters values 
    
    :examples: 
    >>> from sklearn.svm import SVC 
    >>> from watex.base import get_params 
    >>> sigmoid= SVC (
        **{
            'C': 512.0,
            'coef0': 0,
            'degree': 1,
            'gamma': 0.001953125,
            'kernel': 'sigmoid',
            'tol': 1.0 
            }
        )
    >>> pvalues = get_params( sigmoid)
    >>> {'decision_function_shape': 'ovr',
         'break_ties': False,
         'kernel': 'sigmoid',
         'degree': 1,
         'gamma': 0.001953125,
         'coef0': 0,
         'tol': 1.0,
         'C': 512.0,
         'nu': 0.0,
         'epsilon': 0.0,
         'shrinking': True,
         'probability': False,
         'cache_size': 200,
         'class_weight': None,
         'verbose': False,
         'max_iter': -1,
         'random_state': None
     }
    """
    if hasattr (obj, '__call__'): 
        cls_or_func_signature = inspect.signature(obj)
        PARAMS_VALUES = {k: None if v.default is (inspect.Parameter.empty 
                         or ...) else v.default 
                    for k, v in cls_or_func_signature.parameters.items()
                    # if v.default is not inspect.Parameter.empty
                    }
    elif hasattr(obj, '__dict__'): 
        PARAMS_VALUES = {k:v  for k, v in obj.__dict__.items() 
                         if not (k.endswith('_') or k.startswith('_'))}
    
    return PARAMS_VALUES




[docs]
def existfeatures (df, features, error='raise'): 
    """Control whether the features exists or not  
    
    :param df: a dataframe for features selections 
    :param features: list of features to select. Lits of features must be in the 
        dataframe otherwise an error occurs. 
    :param error: str - raise if the features don't exist in the dataframe. 
        *default* is ``raise`` and ``ignore`` otherwise. 
        
    :return: bool 
        assert whether the features exists 
    """
    isf = False  
    
    error= 'raise' if error.lower().strip().find('raise')>= 0  else 'ignore' 

    if isinstance(features, str): 
        features =[features]
        
    features = _assert_all_types(features, list, tuple, np.ndarray)
    set_f =  set (features).intersection (set(df.columns))
    if len(set_f)!= len(features): 
        nfeat= len(features) 
        msg = f"Feature{'s' if nfeat >1 else ''}"
        if len(set_f)==0:
            if error =='raise':
                raise ValueError (f"{msg} {smart_format(features)} "
                                  f"{'does not' if nfeat <2 else 'dont'}"
                                  " exist in the dataframe")
            isf = False 
        # get the difference 
        diff = set (features).difference(set_f) if len(
            features)> len(set_f) else set_f.difference (set(features))
        nfeat= len(diff)
        if error =='raise':
            raise ValueError(f"{msg} {smart_format(diff)} not found in"
                             " the dataframe.")
        isf = False  
    else : isf = True 
    
    return isf  

    

[docs]
def selectfeatures (
        df: DataFrame,
        features: List[str] =None, 
        include = None, 
        exclude = None,
        coerce: bool=False,
        **kwd
        ): 
    """ Select features  and return new dataframe.  
    
    :param df: a dataframe for features selections 
    :param features: list of features to select. Lits of features must be in the 
        dataframe otherwise an error occurs. 
    :param include: the type of data to retrieved in the dataframe `df`. Can  
        be ``number``. 
    :param exclude: type of the data to exclude in the dataframe `df`. Can be 
        ``number`` i.e. only non-digits data will be keep in the data return.
    :param coerce: return the whole dataframe with transforming numeric columns.
        Be aware that no selection is done and no error is raises instead. 
        *default* is ``False``
    :param kwd: additional keywords arguments from `pd.astype` function 
    
    :ref: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.astype.html
    """
    
    if features is not None: 
        existfeatures(df, features, error ='raise')
    # change the dataype 
    df = df.astype (float, errors ='ignore', **kwd) 
    # assert whether the features are in the data columns
    if features is not None: 
        return df [features] 
    # raise ValueError: at least one of include or exclude must be nonempty
    # use coerce to no raise error and return data frame instead.
    return df if coerce else df.select_dtypes (include, exclude)