Source code for watex.cases.features

# -*- coding: utf-8 -*-
#   Licence:BSD-3-Clause
#   Author: LKouadio <etanoyau@gmail.com>

from __future__ import ( 
    print_function , 
    annotations
    )

import os
import re 
import warnings 
import xml.etree.ElementTree as ET
import pandas as pd 
import numpy as np 

from ..utils.funcutils import  ( 
    savepath_ , 
    sanitize_fdataset, 
    )   
from .._typing import ( 
    List,
    Optional, 
    DataFrame,
    )
from ..utils.hydroutils import ( 
    writef,  
    exportdf, 
    categorize_flow
    ) 
from ..exceptions import ( 
    FileHandlingError, 
    FeatureError, 
   )
from ..utils.gistools import ( 
    ll_to_utm, 
    project_point_ll2utm
    )
from ..utils.coreutils import _is_readable 
from watex._watexlog import watexlog 

__all__=['GeoFeatures', 'FeatureInspection'] 

__docformat__='restructuredtext' 
_logger =watexlog().get_watex_logger(__name__)



[docs]
class GeoFeatures: 
    """
    Features class. Deals  with Electrical Resistivity profile (VES), 
    Vertical electrical Sounding (VES), Geological (Geol) data and 
    Borehole data(Boreh). Set all features values of differents
    investigation sites. Features class is  composed of: 
    
    - `erp` class  get from :class:`watex.methods.erp.ERP_colection`
    - `geol`  obtained from :class:`watex.geology.geology.Geology` 
    - `boreh` get from :class:`watex.geology.geology.Borehole` 
    
    Arguments 
    -----------
    *features_fn* :str , Path_like 
        File to geoelectical  features files.
        
    *ErpColObjs*: object 
        Collection object from `erp` survey lines. 
        
    *vesObjs*: object, 
        Collection object from vertical electrical sounding (VES) curves.
        
    *geoObjs*: object, 
        Collection object from `geol` class. See :doc:`watex.geology.geology.Geology`.
        
    *boreholeObjs*: object
        Collection of boreholes of all investigation sites.
        Refer to :doc:`watex.geology.geology.Borehole`


    Holds on others optionals infos in ``kwargs`` arguments: 
    
    ============  ========================  ===================================
    Attributes              Type                Description  
    ============  ========================  ===================================
    df              pd.core.DataFrame       Container of all features composed 
                                            of :attr:`~Features.featureLabels`
    site_ids        array_like              ID of each survey locations.
    site_names      array_like              Survey locations names. 
    gFname          str                     Filename of `features_fn`.                                      
    ErpColObjs      obj                     ERP `erp` class object. 
    vesObjs         obj                     VES `ves` class object.
    geoObjs         obj                     Geology `geol` class object.
    borehObjs       obj                     Borehole `boreh` class obj.
    ============  ========================  ===================================   
    
        
    Notes 
    ------
    Be sure to not miss any coordinates files. Indeed, each selected anomaly
    should have a borehole performed at that place for supervising learing.
    That means, each selected anomaly referenced by location coordinates and 
    `id` on `erp` must have it own `ves`, `geol` and `boreh` data. For furher
    details about classes object , please refer to the classes documentation 
    aforementionned.

    Examples
    ---------
    >>> from watex.cases.features import GeoFeatures 
    >>> data ='data/geodata/main.bagciv.data.csv' 
    >>> featObj =GeoFeatures().fit(data )
    >>> featObj.id_
    Out[114]: 
    array(['e0000001', 'e0000002', 'e0000003', 'e0000004', 'e0000005',
           'e0000006', 'e0000007'], dtype='<U8')
    >>> featObj.site_names_
    >>> featObj.site_names_[:7] 
    Out[115]: array(['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'], dtype=object)
     
    """
    
    featureLabels_ = [
                    'id', 
                    'east',
                    "north",
                    'power',
                    "magnitude",
                    "shape",
                    "type",
                    "sfi",
                    'ohmS',
                    'lwi', 
                    'geol',
                    'flow'
            ]

    def __init__(self, **kws):
        
        self._logging = watexlog.get_watex_logger(self.__class__.__name__)

        for key in list(kws.keys()):
            setattr(self, key, kws[key])
            
         
    @property 
    def data  (self): 
        """ Control the Feature-file extension provide. Usefull to select 
        pd.DataFrame construction."""
        return self.data_
    
    @data.setter 
    def data(self, data ) :
        """ Get the Features file and  seek for pd.Core methods construction."""
    
        self.data_ = _is_readable(data)

 

[docs]
    def fit(self, 
            data: Optional[str |DataFrame]=None , 
            geoObj=None, 
            erpObj=None, 
            vesObj=None, 
            boreholeObj=None, 
            **kws): 
        """
        Reading class and attributes populating. Please refer to 
        :doc:`~.core.geofeatures.Features` for arguments details.
        
        """
        
        if data is not None: 
            self.data = data 
            
        fimp, objsCoun =0,0 
        for nname, vval in zip(['data' , 'erpObj' , 'vesObj',
                 'geoObj', 'borehObj'],[data , erpObj , vesObj,
                 geoObj, boreholeObj]): 
            if vval is not None: 
                setattr(self,nname, vval )
                if nname !='data':
                    objsCoun +=1
        # call object
        for fObjs in ['ErpColObjs' , 'vesObjs',
                'geoObjs', 'borehObjs']: 
            if getattr(self, fObjs, None) is None : 
                fimp =1        
                
        if self.data is None and fimp ==1:
            raise FeatureError (
                'Features file is not given. Please provide specific'
                ' objects from`erp`, `ves`, `geology` and `borehole` data'
                'Call each specific collection class to build each'
                ' collection features.')
        elif self.data is not None : 
            #self.fn = self.features_fn 
            self.sanitize_fdataset()
            try : 
                self.site_names_ =np.copy(self.df_['id'].to_numpy())
            except KeyError: 
                 # force to set id 
                self.df_=self.df_.rename(columns = {'name':'id'})
                self.site_names_ =np.copy(self.df_['id'].to_numpy())
                # self._index_col_id ='id'
            
            if self.utm_flag_ ==0 :
                # convert lat and lon to utm 

                self.easting_ = np.zeros_like(self.df_['lat'].to_numpy())
                self.northing_ =np.zeros_like (self._easting)
                for ii in range(len(self.northing_)):
                    try : 
                        self.utm_zone_, utm_easting, utm_northing = ll_to_utm(
                                        reference_ellipsoid=23, 
                                        lat=self.df_['lon'].to_numpy()[ii],
                                        lon = self.df_['lat'].to_numpy()[ii])
                    except : 
                        utm_easting, utm_northing, \
                            self.utm_zone= project_point_ll2utm(
                            lat=self.df_['lat'].to_numpy()[ii],
                            lon = self.df_['lon'].to_numpy()[ii])
                        
                    self.easting_[ii] = utm_easting
                    self.northing_ [ii] = utm_northing
            
                self.df_.insert(loc=1, column ='east', value = self.easting_)
                self.df_.insert(loc=2, column='north', value=self.northing_)
                
                try : 
                    del self.df_['lat']
                    del self.df_['lon']
                except : 
                    try : 
                        self.df_ = self.df_.drop(['lat'], axis=1)
                        self.df_ = self.df_.drop(['lon'], axis=1)
                    except : 
                        try: 
                            self.df_.pop('lat')
                            self.df_.pop('lon')
                        except: 
                           self._logging.debug(
                               'No way to remove `lat` and `lon` in features '
                               "dataFrame. It seems there is no `lat` & `lon`"
                               " pd.series in your dataFrame.") 
            
            #Keep location names 
            self.df_['id']=np.array(['e{0}'.format(id(name.lower())) 
                                  for name in self.df_['id']])
    
            self.id =np.copy(self.df_['id'].to_numpy())
            self.id_ = np.array(['e{0:07}'.format(ii+1) 
                                     for ii in range(len(self.df_['id']))])
            # rebuild the dataframe from main features
            self.df_ = pd.concat({
                featkey: self.df_[featkey] 
                for featkey in self.featureLabels_}, axis =1)


        if objsCoun ==4 : 
            # mean all object is provided corrected 
            # self.ErpColObjs.fnames
            #initit df
            temlen= [len(obj) for obj in [self.ErpColObjs.erpdf['id'], 
                                self.borehObjs.borehdf['id'], 
                                self.geoObjs.geoldf['id'], 
                                self.vesObjs.vesdf['id'] ]]
            
            if all(temlen) is False:
                raise FeatureError  (
                    '`ERP`, `VES`, `Geology` and `Borehole` Features must '
                    'have the same length. You  give <{0},{1},{2}, and '
                    '{3} respectively.'.format(*temlen))
                
            
            self.df_ =pd.DataFrame(data = np.array((len(self.ErpColObjs.fnames), 
                                                   len(self.featureLabels_))), 
                                  columns = self.featureLabels_)
            
            self.id_= self.controlObjId(
                              erpObjID=self.ErpColObjs.erpdf['id'], 
                              boreObjID=self.borehObjs.borehdf['id'], 
                              geolObjID=self.geoObjs.geoldf['id'], 
                              vesObjsID= self.vesObjs.vesdf['id']
                              )
            
            self.df_ =self.merge(self.ErpColObjs.erpdf, #.drop(['id'], axis=1),
                                self.vesObjs.vesdf['ohmS'],
                                self.geoObjs.geoldf['geol'], 
                                self.borehObjs.borehdf[['lwi', 'flow']], 
                                right_index=True, 
                                left_index=True)
            
            #self.df.insert(loc=0, column ='id', value = newID)
            self.id =self.ErpColObjs.erpdf['id'].to_numpy()
            
        self.df_.set_index('id', inplace =True)
        self.df_ =self.df_.astype({'east':np.float, 
                      'north':np.float, 
                      'power':np.float, 
                      'magnitude':np.float, 
                      'sfi':np.float, 
                      'ohmS': np.float, 
                      'lwi':np.float, 
                      'flow':np.float
                      })
            
        # populate site names attributes 
        for attr_ in self.site_names_: 
            if not hasattr(self, attr_): 
                setattr(self, attr_, ID()._findFeaturePerSite_(
                                        _givenATTR=attr_, 
                                        sns=self.site_names_,
                                        df_=self.df_,
                                        id_=self.id, 
                                        id_cache= self.id_))
            
        return self 

    

[docs]
    def sanitize_fdataset(self): 
        """ Sanitize the feature dataset. Recognize the columns provided 
        by the users and resset according to the features labels disposals
        :attr:`~.GeoFeatures.featureLabels`."""
        
        self.utm_flag_ =0
        OptsList, paramsList =[['bore', 'for'], 
                                ['x','east'], 
                                ['y', 'north'], 
                                ['pow', 'puiss', 'pa'], 
                                ['magn', 'amp', 'ma'], 
                                ['shape', 'form'], 
                                ['type'], 
                                ['sfi', 'if'], 
                                ['lat'], 
                                ['lon'], 
                                ['lwi', 'wi'], 
                                ['ohms', 'surf'], 
                                ['geol'], 
                                ['flow', 'deb']
                                ], ['id', 
                                   'east', 
                                   'north', 
                                   'power', 
                                   'magnitude', 
                                   'shape', 
                                   'type', 
                                   'sfi', 
                                   'lat', 
                                   'lon', 
                                   'lwi', 
                                   'ohmS', 
                                   'geol', 
                                   'flow'
                                   ]
 
        def getandReplace(optionsList, params, df): 
            """
            Function to  get parames and replace to the main features params.
            
            :param optionsList: 
                User options to qualified the features headlines. 
            :type optionsList: list
            
            :param params: Exhaustive parameters names. 
            :type params: list 
            
            :param df: pd.DataFrame collected from `features_fn`. 
            
            :return: sanitize columns
            :rtype: list 
            
            """
            columns = [c.lower() for c in df.columns] 
            
            for ii, celemnt in enumerate(columns): 
                for listOption, param in zip(optionsList, params): 
                     for option in listOption:
                         if param =='lwi': 
                            if celemnt.find('eau')>=0 : 
                                columns[ii]=param 
                                break
                         if re.match(r'^{0}+'.format(option), celemnt):
                             columns[ii]=param
                             if columns[ii] =='east': 
                                 self.utm_flag_=1
                             break

    
                        
            return columns

        new_df_columns= getandReplace(optionsList=OptsList, params=paramsList,
                                      df= self.data)
        self.df_= pd.DataFrame(data=self.data.to_numpy(), 
                               columns= new_df_columns)

        
 


[docs]
    def from_csv(self, erp_fn):
        """
        Method essentially created to read file from csv , collected 
        horizontal distance value and apparent resistivy values. 
        then send to the class for computation purposes. 
        
        :param erp_fn: path_like string of CSV file 
        :type erp_fn: str 
        
        :return: horizontal distance im meters 
        :rtype: np.array of all data.
        
        """
        if not os.path.isfile(erp_fn):
            raise FileHandlingError (
                '{} is not a file. Please provide a right file !'.format(erp_fn))
        # with open(erp_fn, 'r') as fcsv: 
        #     csvData = fcsv.readlines()
            
        # retrieve ;locations,  coordinates values for each pk, 
        # horizontal distance in meter and apparent resistivity values 
        pass



[docs]
    def from_xml (self, xml_fn, columns=None):
        """
        collected data from xml  and build dataFrame 
        
        :param xxlm_fn: Full path to xml file 
        :type xml: str 
        
        :param columns: list of columns of dataset 
        :type columns: list 
        
        """
        tree = ET.parse(xml_fn)

        root = tree.getroot()
        dataframe = pd.DataFrame(columns = columns)
        # loop the node and collect files from loop 
        seriesList =[]
        for ii, node in enumerate(root): 
            seriesList.append(node.find(columns[ii]).text)
            
        # after loop , use series to create pandas dataframes 
        #create dataframe but ignore index 
        dataframe=dataframe.append(pd.Series(seriesList, index=columns ), 
                                   ignore_index=True )

        


[docs]
    def from_json (self, json_fn , indent =4):
        """
        Collected data from json files and retrieve the most insights contents 
        
        :param json_fn: json file 
        :type json_fn: str 
        
        """
        pass 

    

[docs]
    def data_to_numpy(self, data_fn): 
        """
        Method to get datatype and set different features into nympy array
        
        """
        if data_fn is not None : 
            self.data_fn_ =data_fn 
        
        if not os.path.isfile(self.data_fn_): 
            raise FileHandlingError(
                '{} is not a file. Please provide a '
                'right file !'.format(self.data_fn_))
        ex_file = os.path.splitext(self.data_fn_)[1] 
        if not ex_file in self.dataType.keys(): 
            pass 

    

[docs]
    @staticmethod
    def controlObjId( erpObjID, boreObjID, geolObjID, vesObjsID): 
        """
        Control object id whether the each selected anomaly from `erp` matchs 
        with its`ves` and `geol` and `borehole`.
        
        :param erpObjID: ERP object ID. Refer to 
            :class:`watex.methods.erp.ERP_collection` 
        :type erpObjID: str 
        
        :param boreObjID: Borehole ID.  Refer to 
            :class:`watex.geology.drilling.Borehole`
        :type boreObjID: str 
        
        :param boreObjID: Geology ID.  Refer to 
            :class:`watex.geology.geology.Geology`
        :type boreObjID: str
        
        :param vesObjsID: VES object ID. Refer to 
            :class:`watex.methods.electrical.VerticalSounding`
        
        :return: New survey ID
        
        """
        new_id =np.zeros_like(erpObjID)
        for ii, ( erObj, bhObj, geolObj, vesObj) in enumerate(zip(
                                erpObjID,boreObjID,geolObjID, vesObjsID)): 
            if erObj.replace('e', '') == bhObj.replace(
                    'e', '') and erObj.replace('e', '')== bhObj.replace(
                        'e', '') and erObj.replace(
                            'e', '')== bhObj.replace('e', '') :
                
                new_id [ii] ='e{0:07}'.format(ii+1)
            else: 
                raise FeatureError(
                    "Survey location's name must be the same for `erp`, "
                    ' `ves` and `geol` but you are given '
                    '<{0}, {1}, {2}> respectively. Please rename'
                    ' names to be the same everywhere.'.format(erObj, bhObj,
                                                           geolObj))
        return new_id 

    
    

[docs]
    @writef(reason='write', from_='df')
    def exportdf (self, refout=None, to =None, savepath=None, **kwargs): 
        """ Export dataframe from :attr:`~.features.GeoFeatures.df` to files 
        can be Excell sheet file or '.json' file. To get more details about 
        the `writef` decorator, see :func:`watex.decorators.writef`. 
        
        :param refout: 
            Output filename. If not given will be created refering to  the 
            exported date. 
        :param to: 
            Export type. Can be `.xlsx` , `.csv`, `.json` and else
        :type to: str 
        
        :param savepath: 
            Path to save the `refout` filename. If not given
            will be created.
        :returns: 
            - `ndf`: new dataframe from `attr:`~.geofeatures.Features.df` 
     
        :Example: 
            
            >>> from watex.bases.features import Features 
            >>> featObj = Features(
            ...    features_fn= 'data/geo_fdata/BagoueDataset2.xlsx' )
            >>> featObj.exportdf(refout=ybro, to='csv')
    
        """
        df =kwargs.pop('df', None)
        modname =kwargs.pop('moduleName', '_geoFeatures_')
        writeindex =kwargs.pop('writeindex', False)
        
        if df is not None : 
            self.df =df 
            
        for attr in ['to', 'savepath', 'refout']:
            if not hasattr(self, attr): 
                setattr(self, attr,  None)

        if savepath is not None : self.savepath = savepath  
        if to is not None: self.to = to
        if refout is not None: self.refout = refout
        
        # create new data and replace id by site name 
        ndf =self.df.copy(deep=True)
        ndf.reset_index(inplace =True)
        ndf['id'] =self.site_names 
        
        if self.savepath is None :
            self.savepath = savepath_(modname)
            
        return ndf, self.to,  self.refout,\
            self.savepath, writeindex


        

class ID: 
    """
    Special class to manage Feature's ID. Each `erp` or `ves` or `geol` and
    `borehole` name can be an attribute of the each collection class. 
    Eeach survey line is identified with its  common `ID` and point to 
    the same name.
    
    :param _givenATTR:  Station or location name considered a new name for 
        attribute creating
    :type _givenATTR: str 
    
    :param sns: Station names from `erp`, `ves` and `geol`. 
    :type sns: array_like or sns
    
    :param id_: 
        Indentification site number. See col ``id`` of :attr:`~geofeatures.id_`
        
    :param id_cache: 
        New id of station number kept on caches 
        
    :param df_: Features dataFrame. Refer to :attr:~geofeatures.df 
    
    :Example: 
        
        >>> from watex.core.geofeatures import Features, ID
        >>> featObj =Features(features_fn= 
        ...                      'data/geo_fdata/BagoueDataset2.xlsx' )
        >>> featObj.b126
        
    where ``b126`` is the surveyname and `featObj.b126` is data value 
    extracted from features dataFrame :attr:`watex.core.geofeatures.Features.df`
    
    :Note: To extract data from station location name `sns`, be sure to write 
        the right name. If not an `AttributeError` occurs. 
    
    """
    
    def __init__(self, **kwargs): 
        self._logging = watexlog().get_watex_logger(self.__class__.__name__)

        for key in list(kwargs.keys()): 
            setattr(self, key, kwargs[key])
        
        if hasattr(self, '_givenATTR'): 
            self._findFeaturePerSite_()
            
    def _findFeaturePerSite_(self, _givenATTR, sns=None, df_=None, id_=None, 
                             id_cache=None ): 
        """Check the report between `site_names ` and `ids`. If `givenATTR is 
        among `sns` or reference object `id_` or `id_cache` then value of 
        given station name will be selected as a dataframe.
        
        :param givenATTR: 
            Station or location name considered a new name for attribute 
            creating:
            
            .. code-block::
            
                >>> from watex.bases.features import Features
                >>> location_name ='gbalo_l10'
                >>> Features.gbalo
                
        :return: As select part of DataFrame
        :rtype: pd.DataFrame 
        
        """
        for attr, value in zip(['_givenATTR', 'df_', 'sns', 'id_cache', 'id_'], 
                             [_givenATTR, df_, sns, id_cache, id_]): 
            if not hasattr(self, attr): 
                setattr(self, attr, value)

        for ii, (name, id_, idp_) in enumerate(zip(self.sns,self.id_,
                                                    self.id_cache)) : 
             if self._givenATTR.lower() == name or \
                 self._givenATTR.lower()==id_ \
                 or self._givenATTR.lower() == idp_ : 
                 self.select_ = self.df_.iloc[
                     [int( idp_.replace('e', '')) - 1]]
                 
        return self.select_ 

                    

[docs]
class FeatureInspection: 
    """ 
    Summarizes the flow features. 
    
    It deals with data features categorization. When numericall values are 
    provided standard `qualitative` or `quantitative`  analysis is performed.
    
    Parameters  
    -----------
    *data*: str or pd.core.DataFrame  
        Path-like object or pandas Dataframe. Must contain the  main 
        parameters including the `target`. 
        
    **tname**:str 
        The tname for predicting purposes. Here for groundwater exploration, 
        we specify the name of the target as ``flow``. 
        
    **flow_classes**: list or array_like 
        The way to classify the flow. Provide the main specific values to convert 
        the  categorial trends to numerical values.  Different projects have 
        different tnameing flow rate. Might specify either for village hydraulic, 
        or improved village hydraulic  or urban hydraulics. 
        
    **drop_columns**: list  
        items for dropping. To analyse the data, we can drop some specific 
        columns to not corrupt data analysis. In formal dataframe  collected 
        straighforwardly from :class:`~features.GeoFeatures`,the default
        `drop_columns` refer to coordinates positions as : ['east', 'north'].
        
    **mapflow: bool, 
        if set to True, value in the target columns should map to categorical 
        values. Commonly the flow rate values are given as a trend of numerical
        values. For a classification purpose, flow rate must be converted to 
        categorical values which are mainly refered to the type of types of
        hydraulic. Mostly the type of hydraulic system is in turn tided to the 
        the number of the living population in a specific area. For instance, 
        flow classes can be ranged as follow: 
    
            - FR = 0 is for dry boreholes
            - 0 < FR ≤ 3m3/h for village hydraulic (≤2000 inhabitants)
            - 3 < FR ≤ 6m3/h  for improved village hydraulic(>2000-20 000inhbts) 
            - 6 <FR ≤ 10m3/h for urban hydraulic (>200 000 inhabitants). 
        
        Note that this flow range is not exhaustive and can be modified according 
        to the type of hydraulic required on the project. 
    
    **set_index**: bool, 
        condired a column as dataframe index. If set to ``True``, 
        please provided the `col_name`, otherwise it should be the ``id`` as 
        as a default columns item. 
        
    **sanitize**: 
        polish the data and remove inconsistent columns in the data which are 
        not refer to the predicting features. It is able to change for instance
        the french name of water ``eau`` to 'water` wich is related to the value 
        of water inflow features ``lwi``. This could be  usefull when the data 
        is given as a Path-Like object and features are not described correctly
        in the case of groundwater. Default is ``False``
        
 
    Examples
    --------
    >>> from watex.cases.features import FeatureInspection
    >>> data = 'data/geodata/main.bagciv.data.csv'
    >>> fobj = FeatureInspection().fit(data) 
    >>> fobj.data_.columns
    Out[117]: 
    Index(['num', 'name', 'east', 'north', 'power', 'magnitude', 'shape', 'type',
           'sfi', 'ohmS', 'lwi', 'geol', 'flow'],
          dtype='object')
    """
    
    def __init__(self,
                 tname:str ='flow' ,
                 mapflow:bool =True, 
                 sanitize:bool =False,
                 flow_classes: List[float] = [0., 1., 3.],
                 set_index:bool = False, 
                 col_name: str = None, 
                 **kws): 
        self._logging =watexlog().get_watex_logger(self.__class__.__name__)
        
        self.tname =tname
        self.set_index =set_index 
        self.sanitize =sanitize
        self.mapflow =mapflow
        self.flow_classes_=flow_classes
        self.col_name= col_name 
        
        self.index_col_id =kws.pop('col_id', 'id')
        self.drop_columns =kws.pop('drop_columns', None)

        
        self.cache_ =None 

        for key in list(kws.keys()): 
            setattr(self, key, kws[key])
            

    @property 
    def flow_classes(self): 
        return self.flow_classes_
    @flow_classes.setter 
    def flow_classes(self, flow_classes): 
        """ When tnameing features is numerically considered, The `setter` 
        try to classified into different classes provided. """
        if flow_classes is None: 
            flow_classes = [0.,  3.,  6., 10.]
            warnings.warn("default flow classes argument range is set to"
                          f"'{flow_classes}' m3/h !")
            self.logging.info (' The flow classes argument is set to default'
                ' values : <{0},{1},{2} and {3} m3/h>.'.format( *list(
                 flow_classes)))
            
        try: 
            self.flow_classes_ = np.array(flow_classes).astype(float)
        except: 
            raise FeatureError (f"Not supported flow classes "
                                f"arguments: {flow_classes}")
            
    @property 
    def data (self): 
        """ Control the Feature-file extension provide. Usefull to select 
        pd.DataFrame construction."""
        return self.data_
    
    @data.setter 
    def data(self, data) :
        """ Get the Features file and  seek for pd.Core methods 
        construction."""
        self.data_ = _is_readable(data )

    
    @property 
    def cache(self): 
        """ Generate cache `df_` for all eliminate features and keep on 
        new pd.core.frame.DataFrame. """
        return self.cache_
        
    @cache.setter 
    def cache(self, cache): 
        """ Holds the remove features and keeps on new dataframe
        
        :param cache: iterable object containing the item to drop"""
        
        try:
            
            temDict={'id': self.data_['id'].to_numpy()}
        except KeyError: 
            # if `id` not in colums try 'name'
            temDict={'id': self.data_['name'].to_numpy()}
            self.index_col_id ='name'
            
        temc=[]
        if isinstance(cache, str): 
            cache = [cache] 

        if self.drop_columns is not None: 
            if isinstance(self.drop_columns, str) :
                self.drop_columns =[self.drop_columns]
            cache = cache + self.drop_columns 
            if isinstance(self.tname, str): 
                cache.append(self.tname)
            else : cache + list(self.tname)
            
        for cc in cache: 
            if cc not in self.data_.columns: 
                temDict[cc]= np.full((self.data_.shape[0],), np.nan)
                temc.append(cc)
            else: 
                if cc=='id': continue # id is already in dict
                temDict [cc]= self.data_[cc].to_numpy()
        
        # check into the dataset whether the not provided features exists.
        if self.data_ is not None : 
            df_= self.data_.copy()
            df_.reset_index(inplace= True)
            
            if 'id' in df_.columns: 
                temDict['id_']= df_['id'].to_numpy()

            if len(temc) !=0 : 
                for ad in temc: 
                    if ad in df_.columns: 
                        temDict[ad]= df_[ad]
         
            
        self.cache_= pd.DataFrame(temDict)
        self.col_name = self.col_name or 'id_'
        if self.col_name in self.cache_.columns: 
            self.cache_.set_index('id_', inplace=True)
       
        

[docs]
    def fit(self, data: str | DataFrame): 
        """
        Main goals of this method is to fit and classify the different flow 
        classes in the dataset. However by default, four(04) flow classes are 
        considered according to the reference below 
        
        Parameters 
        -----------
        
        *data*: str or pd.core.DataFrame  
            Path-like object or pandas Dataframe. Must contains of the  main 
            parameters including the `tname` the tname. 
            
        Returns
        --------
        object: :class:`~.FeatureInspection` object 
        
        Examples
        ---------
        >>> from watex.bases.features import FeatureInspection
        >>> data = 'data/geodata/main.bagciv.data.csv'
        >>> fobj = FeatureInspection() 
        >>> fobj.fit(data)
        >>> fobj.data.iloc[1:3 , :]
        ...    num name  power  magnitude  ...         ohmS        lwi      geol  flow
        1    2   b2   70.0      142.0  ...  1135.551531  21.406531  GRANITES   FR1
        2    3   b3   80.0       87.0  ...   767.562500   0.000000  GRANITES   FR1
        
        
        Notes 
        --------
        The paper mentions 04 types of hydraulic according to the population 
        demand and the number of living inhabitants. The hydraulic system are
        defined as:
         
         *  FR = 0 is for dry boreholes
         *  0 < FR ≤ 3m3/h for village hydraulic (≤2000 inhabitants)
         *  3 < FR ≤ 6m3/h  for improved village hydraulic(>2000-20 000inhbts) 
         *  6 <FR ≤ 10m3/h for urban hydraulic (>200 000 inhabitants). 
         
        The flow classes can be modified according to the type of hydraulic
        proposed for the project. 
        
        References 
        ------------
            
        .. [1] CIEH. (2001). L’utilisation des méthodes géophysiques pour
            la recherche d’eaux dans les aquifères discontinus. 
            Série Hydrogéologie, 169.
            
        """
        
        self.data = data 
            
        if self.data_ is None: 
            raise FeatureError("NoneType can not be a data of features.")
            
              
        if self.sanitize is True : 
            self.data_ , utm_flag = sanitize_fdataset(self.data_)
        # test_df = self._df.copy(deep=True)

        # df = self.data_.copy()
        if self.drop_columns is not None:
            if isinstance(self.drop_columns, str): 
                    self.drop_columns = [self.drop_columns]
           
            if  len(set(list(self.data.columns)).intersection(
                    set(self.drop_columns))) !=len(self.drop_columns):
                raise  FeatureError (
                    'Drop values are not found on dataFrame columns. '
                    'Please provide the right names for droping.')
                
            self.cache = self.drop_columns  
            self.data_.drop(columns = self.drop_columns, inplace =True)
            
        if self.mapflow is True : 
            self.data_[self.tname]= categorize_flow(
                target= self.data_[self.tname], 
                flow_values =self.flow_classes)
  
        if self.set_index :
            # id_= [name  for name in self.df.columns if name =='id']
            if self.index_col_id !='id': 
                self.data_=self.data_.rename(columns = {self.index_col_id:'id'})
                self.index_col_id ='id'
                
            try: 
                self.data_.set_index(self.index_col_id, inplace =True)
            except KeyError : 
                # force to set id 
                self.data_=self.data.rename(columns = {'name':'id'})
                self.index_col_id ='id'
                # self.df.set_index('name', inplace =True)

        if self.tname =='flow': 
            self.data_ =self.data_.astype({
                             'power':np.float, 
                             'magnitude':np.float, 
                             'sfi':np.float, 
                             'ohmS': np.float, 
                              'lwi': np.float, 
                              }
                )  
        return self 

    
    

[docs]
    def writedf(self, df=None , refout:str =None,  to:str =None, 
              savepath:str =None, modname:str ='_anEX_',
              reset_index:bool =False): 
        """
        Write the analysis `df`. 
        
        Refer to :func:`watex.decorators.exportdf` for more details about 
        the arguments ``refout``, ``to``, ``savepath``, ``modename``
        and ``rest_index``. 
        
        :Example: 
            
            >>> from watex.analysis.bases.features import FeatureInspection
            >>> slObj =FeatureInspection(
            ...   data_fn='data/geo_fdata/BagoueDataset2.xlsx',
            ...   set_index =True)
            >>> slObj.writedf()
        
        """
        for nattr, vattr in zip(
                ['df', 'refout', 'to', 'savepath', 'modname', 'reset_index'], 
                [df, refout, to, savepath, modname, reset_index]): 
            if not hasattr(self, nattr): 
                setattr(self, nattr, vattr)
                
        exportdf(df= self.df , refout=self.refout,
               to=self.to, savepath =self.savepath, 
               reset_index =self.reset_index, modname =self.modname)