Source code for watex.cases.features

# -*- coding: utf-8 -*-
#   Licence:BSD-3-Clause
#   Author: LKouadio <etanoyau@gmail.com>

from __future__ import ( 
    print_function , 
    annotations
    )

import os
import re 
import warnings 
import xml.etree.ElementTree as ET
import pandas as pd 
import numpy as np 

from ..utils.funcutils import  ( 
    savepath_ , 
    sanitize_fdataset, 
    )   
from .._typing import ( 
    List,
    Optional, 
    DataFrame,
    )
from ..utils.hydroutils import ( 
    writef,  
    exportdf, 
    categorize_flow
    ) 
from ..exceptions import ( 
    FileHandlingError, 
    FeatureError, 
   )
from ..utils.gistools import ( 
    ll_to_utm, 
    project_point_ll2utm
    )
from ..utils.coreutils import _is_readable 
from watex._watexlog import watexlog 

__all__=['GeoFeatures', 'FeatureInspection'] 

__docformat__='restructuredtext' 
_logger =watexlog().get_watex_logger(__name__)


[docs] class GeoFeatures: """ Features class. Deals with Electrical Resistivity profile (VES), Vertical electrical Sounding (VES), Geological (Geol) data and Borehole data(Boreh). Set all features values of differents investigation sites. Features class is composed of: - `erp` class get from :class:`watex.methods.erp.ERP_colection` - `geol` obtained from :class:`watex.geology.geology.Geology` - `boreh` get from :class:`watex.geology.geology.Borehole` Arguments ----------- *features_fn* :str , Path_like File to geoelectical features files. *ErpColObjs*: object Collection object from `erp` survey lines. *vesObjs*: object, Collection object from vertical electrical sounding (VES) curves. *geoObjs*: object, Collection object from `geol` class. See :doc:`watex.geology.geology.Geology`. *boreholeObjs*: object Collection of boreholes of all investigation sites. Refer to :doc:`watex.geology.geology.Borehole` Holds on others optionals infos in ``kwargs`` arguments: ============ ======================== =================================== Attributes Type Description ============ ======================== =================================== df pd.core.DataFrame Container of all features composed of :attr:`~Features.featureLabels` site_ids array_like ID of each survey locations. site_names array_like Survey locations names. gFname str Filename of `features_fn`. ErpColObjs obj ERP `erp` class object. vesObjs obj VES `ves` class object. geoObjs obj Geology `geol` class object. borehObjs obj Borehole `boreh` class obj. ============ ======================== =================================== Notes ------ Be sure to not miss any coordinates files. Indeed, each selected anomaly should have a borehole performed at that place for supervising learing. That means, each selected anomaly referenced by location coordinates and `id` on `erp` must have it own `ves`, `geol` and `boreh` data. For furher details about classes object , please refer to the classes documentation aforementionned. Examples --------- >>> from watex.cases.features import GeoFeatures >>> data ='data/geodata/main.bagciv.data.csv' >>> featObj =GeoFeatures().fit(data ) >>> featObj.id_ Out[114]: array(['e0000001', 'e0000002', 'e0000003', 'e0000004', 'e0000005', 'e0000006', 'e0000007'], dtype='<U8') >>> featObj.site_names_ >>> featObj.site_names_[:7] Out[115]: array(['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'], dtype=object) """ featureLabels_ = [ 'id', 'east', "north", 'power', "magnitude", "shape", "type", "sfi", 'ohmS', 'lwi', 'geol', 'flow' ] def __init__(self, **kws): self._logging = watexlog.get_watex_logger(self.__class__.__name__) for key in list(kws.keys()): setattr(self, key, kws[key]) @property def data (self): """ Control the Feature-file extension provide. Usefull to select pd.DataFrame construction.""" return self.data_ @data.setter def data(self, data ) : """ Get the Features file and seek for pd.Core methods construction.""" self.data_ = _is_readable(data)
[docs] def fit(self, data: Optional[str |DataFrame]=None , geoObj=None, erpObj=None, vesObj=None, boreholeObj=None, **kws): """ Reading class and attributes populating. Please refer to :doc:`~.core.geofeatures.Features` for arguments details. """ if data is not None: self.data = data fimp, objsCoun =0,0 for nname, vval in zip(['data' , 'erpObj' , 'vesObj', 'geoObj', 'borehObj'],[data , erpObj , vesObj, geoObj, boreholeObj]): if vval is not None: setattr(self,nname, vval ) if nname !='data': objsCoun +=1 # call object for fObjs in ['ErpColObjs' , 'vesObjs', 'geoObjs', 'borehObjs']: if getattr(self, fObjs, None) is None : fimp =1 if self.data is None and fimp ==1: raise FeatureError ( 'Features file is not given. Please provide specific' ' objects from`erp`, `ves`, `geology` and `borehole` data' 'Call each specific collection class to build each' ' collection features.') elif self.data is not None : #self.fn = self.features_fn self.sanitize_fdataset() try : self.site_names_ =np.copy(self.df_['id'].to_numpy()) except KeyError: # force to set id self.df_=self.df_.rename(columns = {'name':'id'}) self.site_names_ =np.copy(self.df_['id'].to_numpy()) # self._index_col_id ='id' if self.utm_flag_ ==0 : # convert lat and lon to utm self.easting_ = np.zeros_like(self.df_['lat'].to_numpy()) self.northing_ =np.zeros_like (self._easting) for ii in range(len(self.northing_)): try : self.utm_zone_, utm_easting, utm_northing = ll_to_utm( reference_ellipsoid=23, lat=self.df_['lon'].to_numpy()[ii], lon = self.df_['lat'].to_numpy()[ii]) except : utm_easting, utm_northing, \ self.utm_zone= project_point_ll2utm( lat=self.df_['lat'].to_numpy()[ii], lon = self.df_['lon'].to_numpy()[ii]) self.easting_[ii] = utm_easting self.northing_ [ii] = utm_northing self.df_.insert(loc=1, column ='east', value = self.easting_) self.df_.insert(loc=2, column='north', value=self.northing_) try : del self.df_['lat'] del self.df_['lon'] except : try : self.df_ = self.df_.drop(['lat'], axis=1) self.df_ = self.df_.drop(['lon'], axis=1) except : try: self.df_.pop('lat') self.df_.pop('lon') except: self._logging.debug( 'No way to remove `lat` and `lon` in features ' "dataFrame. It seems there is no `lat` & `lon`" " pd.series in your dataFrame.") #Keep location names self.df_['id']=np.array(['e{0}'.format(id(name.lower())) for name in self.df_['id']]) self.id =np.copy(self.df_['id'].to_numpy()) self.id_ = np.array(['e{0:07}'.format(ii+1) for ii in range(len(self.df_['id']))]) # rebuild the dataframe from main features self.df_ = pd.concat({ featkey: self.df_[featkey] for featkey in self.featureLabels_}, axis =1) if objsCoun ==4 : # mean all object is provided corrected # self.ErpColObjs.fnames #initit df temlen= [len(obj) for obj in [self.ErpColObjs.erpdf['id'], self.borehObjs.borehdf['id'], self.geoObjs.geoldf['id'], self.vesObjs.vesdf['id'] ]] if all(temlen) is False: raise FeatureError ( '`ERP`, `VES`, `Geology` and `Borehole` Features must ' 'have the same length. You give <{0},{1},{2}, and ' '{3} respectively.'.format(*temlen)) self.df_ =pd.DataFrame(data = np.array((len(self.ErpColObjs.fnames), len(self.featureLabels_))), columns = self.featureLabels_) self.id_= self.controlObjId( erpObjID=self.ErpColObjs.erpdf['id'], boreObjID=self.borehObjs.borehdf['id'], geolObjID=self.geoObjs.geoldf['id'], vesObjsID= self.vesObjs.vesdf['id'] ) self.df_ =self.merge(self.ErpColObjs.erpdf, #.drop(['id'], axis=1), self.vesObjs.vesdf['ohmS'], self.geoObjs.geoldf['geol'], self.borehObjs.borehdf[['lwi', 'flow']], right_index=True, left_index=True) #self.df.insert(loc=0, column ='id', value = newID) self.id =self.ErpColObjs.erpdf['id'].to_numpy() self.df_.set_index('id', inplace =True) self.df_ =self.df_.astype({'east':np.float, 'north':np.float, 'power':np.float, 'magnitude':np.float, 'sfi':np.float, 'ohmS': np.float, 'lwi':np.float, 'flow':np.float }) # populate site names attributes for attr_ in self.site_names_: if not hasattr(self, attr_): setattr(self, attr_, ID()._findFeaturePerSite_( _givenATTR=attr_, sns=self.site_names_, df_=self.df_, id_=self.id, id_cache= self.id_)) return self
[docs] def sanitize_fdataset(self): """ Sanitize the feature dataset. Recognize the columns provided by the users and resset according to the features labels disposals :attr:`~.GeoFeatures.featureLabels`.""" self.utm_flag_ =0 OptsList, paramsList =[['bore', 'for'], ['x','east'], ['y', 'north'], ['pow', 'puiss', 'pa'], ['magn', 'amp', 'ma'], ['shape', 'form'], ['type'], ['sfi', 'if'], ['lat'], ['lon'], ['lwi', 'wi'], ['ohms', 'surf'], ['geol'], ['flow', 'deb'] ], ['id', 'east', 'north', 'power', 'magnitude', 'shape', 'type', 'sfi', 'lat', 'lon', 'lwi', 'ohmS', 'geol', 'flow' ] def getandReplace(optionsList, params, df): """ Function to get parames and replace to the main features params. :param optionsList: User options to qualified the features headlines. :type optionsList: list :param params: Exhaustive parameters names. :type params: list :param df: pd.DataFrame collected from `features_fn`. :return: sanitize columns :rtype: list """ columns = [c.lower() for c in df.columns] for ii, celemnt in enumerate(columns): for listOption, param in zip(optionsList, params): for option in listOption: if param =='lwi': if celemnt.find('eau')>=0 : columns[ii]=param break if re.match(r'^{0}+'.format(option), celemnt): columns[ii]=param if columns[ii] =='east': self.utm_flag_=1 break return columns new_df_columns= getandReplace(optionsList=OptsList, params=paramsList, df= self.data) self.df_= pd.DataFrame(data=self.data.to_numpy(), columns= new_df_columns)
[docs] def from_csv(self, erp_fn): """ Method essentially created to read file from csv , collected horizontal distance value and apparent resistivy values. then send to the class for computation purposes. :param erp_fn: path_like string of CSV file :type erp_fn: str :return: horizontal distance im meters :rtype: np.array of all data. """ if not os.path.isfile(erp_fn): raise FileHandlingError ( '{} is not a file. Please provide a right file !'.format(erp_fn)) # with open(erp_fn, 'r') as fcsv: # csvData = fcsv.readlines() # retrieve ;locations, coordinates values for each pk, # horizontal distance in meter and apparent resistivity values pass
[docs] def from_xml (self, xml_fn, columns=None): """ collected data from xml and build dataFrame :param xxlm_fn: Full path to xml file :type xml: str :param columns: list of columns of dataset :type columns: list """ tree = ET.parse(xml_fn) root = tree.getroot() dataframe = pd.DataFrame(columns = columns) # loop the node and collect files from loop seriesList =[] for ii, node in enumerate(root): seriesList.append(node.find(columns[ii]).text) # after loop , use series to create pandas dataframes #create dataframe but ignore index dataframe=dataframe.append(pd.Series(seriesList, index=columns ), ignore_index=True )
[docs] def from_json (self, json_fn , indent =4): """ Collected data from json files and retrieve the most insights contents :param json_fn: json file :type json_fn: str """ pass
[docs] def data_to_numpy(self, data_fn): """ Method to get datatype and set different features into nympy array """ if data_fn is not None : self.data_fn_ =data_fn if not os.path.isfile(self.data_fn_): raise FileHandlingError( '{} is not a file. Please provide a ' 'right file !'.format(self.data_fn_)) ex_file = os.path.splitext(self.data_fn_)[1] if not ex_file in self.dataType.keys(): pass
[docs] @staticmethod def controlObjId( erpObjID, boreObjID, geolObjID, vesObjsID): """ Control object id whether the each selected anomaly from `erp` matchs with its`ves` and `geol` and `borehole`. :param erpObjID: ERP object ID. Refer to :class:`watex.methods.erp.ERP_collection` :type erpObjID: str :param boreObjID: Borehole ID. Refer to :class:`watex.geology.drilling.Borehole` :type boreObjID: str :param boreObjID: Geology ID. Refer to :class:`watex.geology.geology.Geology` :type boreObjID: str :param vesObjsID: VES object ID. Refer to :class:`watex.methods.electrical.VerticalSounding` :return: New survey ID """ new_id =np.zeros_like(erpObjID) for ii, ( erObj, bhObj, geolObj, vesObj) in enumerate(zip( erpObjID,boreObjID,geolObjID, vesObjsID)): if erObj.replace('e', '') == bhObj.replace( 'e', '') and erObj.replace('e', '')== bhObj.replace( 'e', '') and erObj.replace( 'e', '')== bhObj.replace('e', '') : new_id [ii] ='e{0:07}'.format(ii+1) else: raise FeatureError( "Survey location's name must be the same for `erp`, " ' `ves` and `geol` but you are given ' '<{0}, {1}, {2}> respectively. Please rename' ' names to be the same everywhere.'.format(erObj, bhObj, geolObj)) return new_id
[docs] @writef(reason='write', from_='df') def exportdf (self, refout=None, to =None, savepath=None, **kwargs): """ Export dataframe from :attr:`~.features.GeoFeatures.df` to files can be Excell sheet file or '.json' file. To get more details about the `writef` decorator, see :func:`watex.decorators.writef`. :param refout: Output filename. If not given will be created refering to the exported date. :param to: Export type. Can be `.xlsx` , `.csv`, `.json` and else :type to: str :param savepath: Path to save the `refout` filename. If not given will be created. :returns: - `ndf`: new dataframe from `attr:`~.geofeatures.Features.df` :Example: >>> from watex.bases.features import Features >>> featObj = Features( ... features_fn= 'data/geo_fdata/BagoueDataset2.xlsx' ) >>> featObj.exportdf(refout=ybro, to='csv') """ df =kwargs.pop('df', None) modname =kwargs.pop('moduleName', '_geoFeatures_') writeindex =kwargs.pop('writeindex', False) if df is not None : self.df =df for attr in ['to', 'savepath', 'refout']: if not hasattr(self, attr): setattr(self, attr, None) if savepath is not None : self.savepath = savepath if to is not None: self.to = to if refout is not None: self.refout = refout # create new data and replace id by site name ndf =self.df.copy(deep=True) ndf.reset_index(inplace =True) ndf['id'] =self.site_names if self.savepath is None : self.savepath = savepath_(modname) return ndf, self.to, self.refout,\ self.savepath, writeindex
class ID: """ Special class to manage Feature's ID. Each `erp` or `ves` or `geol` and `borehole` name can be an attribute of the each collection class. Eeach survey line is identified with its common `ID` and point to the same name. :param _givenATTR: Station or location name considered a new name for attribute creating :type _givenATTR: str :param sns: Station names from `erp`, `ves` and `geol`. :type sns: array_like or sns :param id_: Indentification site number. See col ``id`` of :attr:`~geofeatures.id_` :param id_cache: New id of station number kept on caches :param df_: Features dataFrame. Refer to :attr:~geofeatures.df :Example: >>> from watex.core.geofeatures import Features, ID >>> featObj =Features(features_fn= ... 'data/geo_fdata/BagoueDataset2.xlsx' ) >>> featObj.b126 where ``b126`` is the surveyname and `featObj.b126` is data value extracted from features dataFrame :attr:`watex.core.geofeatures.Features.df` :Note: To extract data from station location name `sns`, be sure to write the right name. If not an `AttributeError` occurs. """ def __init__(self, **kwargs): self._logging = watexlog().get_watex_logger(self.__class__.__name__) for key in list(kwargs.keys()): setattr(self, key, kwargs[key]) if hasattr(self, '_givenATTR'): self._findFeaturePerSite_() def _findFeaturePerSite_(self, _givenATTR, sns=None, df_=None, id_=None, id_cache=None ): """Check the report between `site_names ` and `ids`. If `givenATTR is among `sns` or reference object `id_` or `id_cache` then value of given station name will be selected as a dataframe. :param givenATTR: Station or location name considered a new name for attribute creating: .. code-block:: >>> from watex.bases.features import Features >>> location_name ='gbalo_l10' >>> Features.gbalo :return: As select part of DataFrame :rtype: pd.DataFrame """ for attr, value in zip(['_givenATTR', 'df_', 'sns', 'id_cache', 'id_'], [_givenATTR, df_, sns, id_cache, id_]): if not hasattr(self, attr): setattr(self, attr, value) for ii, (name, id_, idp_) in enumerate(zip(self.sns,self.id_, self.id_cache)) : if self._givenATTR.lower() == name or \ self._givenATTR.lower()==id_ \ or self._givenATTR.lower() == idp_ : self.select_ = self.df_.iloc[ [int( idp_.replace('e', '')) - 1]] return self.select_
[docs] class FeatureInspection: """ Summarizes the flow features. It deals with data features categorization. When numericall values are provided standard `qualitative` or `quantitative` analysis is performed. Parameters ----------- *data*: str or pd.core.DataFrame Path-like object or pandas Dataframe. Must contain the main parameters including the `target`. **tname**:str The tname for predicting purposes. Here for groundwater exploration, we specify the name of the target as ``flow``. **flow_classes**: list or array_like The way to classify the flow. Provide the main specific values to convert the categorial trends to numerical values. Different projects have different tnameing flow rate. Might specify either for village hydraulic, or improved village hydraulic or urban hydraulics. **drop_columns**: list items for dropping. To analyse the data, we can drop some specific columns to not corrupt data analysis. In formal dataframe collected straighforwardly from :class:`~features.GeoFeatures`,the default `drop_columns` refer to coordinates positions as : ['east', 'north']. **mapflow: bool, if set to True, value in the target columns should map to categorical values. Commonly the flow rate values are given as a trend of numerical values. For a classification purpose, flow rate must be converted to categorical values which are mainly refered to the type of types of hydraulic. Mostly the type of hydraulic system is in turn tided to the the number of the living population in a specific area. For instance, flow classes can be ranged as follow: - FR = 0 is for dry boreholes - 0 < FR ≤ 3m3/h for village hydraulic (≤2000 inhabitants) - 3 < FR ≤ 6m3/h for improved village hydraulic(>2000-20 000inhbts) - 6 <FR ≤ 10m3/h for urban hydraulic (>200 000 inhabitants). Note that this flow range is not exhaustive and can be modified according to the type of hydraulic required on the project. **set_index**: bool, condired a column as dataframe index. If set to ``True``, please provided the `col_name`, otherwise it should be the ``id`` as as a default columns item. **sanitize**: polish the data and remove inconsistent columns in the data which are not refer to the predicting features. It is able to change for instance the french name of water ``eau`` to 'water` wich is related to the value of water inflow features ``lwi``. This could be usefull when the data is given as a Path-Like object and features are not described correctly in the case of groundwater. Default is ``False`` Examples -------- >>> from watex.cases.features import FeatureInspection >>> data = 'data/geodata/main.bagciv.data.csv' >>> fobj = FeatureInspection().fit(data) >>> fobj.data_.columns Out[117]: Index(['num', 'name', 'east', 'north', 'power', 'magnitude', 'shape', 'type', 'sfi', 'ohmS', 'lwi', 'geol', 'flow'], dtype='object') """ def __init__(self, tname:str ='flow' , mapflow:bool =True, sanitize:bool =False, flow_classes: List[float] = [0., 1., 3.], set_index:bool = False, col_name: str = None, **kws): self._logging =watexlog().get_watex_logger(self.__class__.__name__) self.tname =tname self.set_index =set_index self.sanitize =sanitize self.mapflow =mapflow self.flow_classes_=flow_classes self.col_name= col_name self.index_col_id =kws.pop('col_id', 'id') self.drop_columns =kws.pop('drop_columns', None) self.cache_ =None for key in list(kws.keys()): setattr(self, key, kws[key]) @property def flow_classes(self): return self.flow_classes_ @flow_classes.setter def flow_classes(self, flow_classes): """ When tnameing features is numerically considered, The `setter` try to classified into different classes provided. """ if flow_classes is None: flow_classes = [0., 3., 6., 10.] warnings.warn("default flow classes argument range is set to" f"'{flow_classes}' m3/h !") self.logging.info (' The flow classes argument is set to default' ' values : <{0},{1},{2} and {3} m3/h>.'.format( *list( flow_classes))) try: self.flow_classes_ = np.array(flow_classes).astype(float) except: raise FeatureError (f"Not supported flow classes " f"arguments: {flow_classes}") @property def data (self): """ Control the Feature-file extension provide. Usefull to select pd.DataFrame construction.""" return self.data_ @data.setter def data(self, data) : """ Get the Features file and seek for pd.Core methods construction.""" self.data_ = _is_readable(data ) @property def cache(self): """ Generate cache `df_` for all eliminate features and keep on new pd.core.frame.DataFrame. """ return self.cache_ @cache.setter def cache(self, cache): """ Holds the remove features and keeps on new dataframe :param cache: iterable object containing the item to drop""" try: temDict={'id': self.data_['id'].to_numpy()} except KeyError: # if `id` not in colums try 'name' temDict={'id': self.data_['name'].to_numpy()} self.index_col_id ='name' temc=[] if isinstance(cache, str): cache = [cache] if self.drop_columns is not None: if isinstance(self.drop_columns, str) : self.drop_columns =[self.drop_columns] cache = cache + self.drop_columns if isinstance(self.tname, str): cache.append(self.tname) else : cache + list(self.tname) for cc in cache: if cc not in self.data_.columns: temDict[cc]= np.full((self.data_.shape[0],), np.nan) temc.append(cc) else: if cc=='id': continue # id is already in dict temDict [cc]= self.data_[cc].to_numpy() # check into the dataset whether the not provided features exists. if self.data_ is not None : df_= self.data_.copy() df_.reset_index(inplace= True) if 'id' in df_.columns: temDict['id_']= df_['id'].to_numpy() if len(temc) !=0 : for ad in temc: if ad in df_.columns: temDict[ad]= df_[ad] self.cache_= pd.DataFrame(temDict) self.col_name = self.col_name or 'id_' if self.col_name in self.cache_.columns: self.cache_.set_index('id_', inplace=True)
[docs] def fit(self, data: str | DataFrame): """ Main goals of this method is to fit and classify the different flow classes in the dataset. However by default, four(04) flow classes are considered according to the reference below Parameters ----------- *data*: str or pd.core.DataFrame Path-like object or pandas Dataframe. Must contains of the main parameters including the `tname` the tname. Returns -------- object: :class:`~.FeatureInspection` object Examples --------- >>> from watex.bases.features import FeatureInspection >>> data = 'data/geodata/main.bagciv.data.csv' >>> fobj = FeatureInspection() >>> fobj.fit(data) >>> fobj.data.iloc[1:3 , :] ... num name power magnitude ... ohmS lwi geol flow 1 2 b2 70.0 142.0 ... 1135.551531 21.406531 GRANITES FR1 2 3 b3 80.0 87.0 ... 767.562500 0.000000 GRANITES FR1 Notes -------- The paper mentions 04 types of hydraulic according to the population demand and the number of living inhabitants. The hydraulic system are defined as: * FR = 0 is for dry boreholes * 0 < FR ≤ 3m3/h for village hydraulic (≤2000 inhabitants) * 3 < FR ≤ 6m3/h for improved village hydraulic(>2000-20 000inhbts) * 6 <FR ≤ 10m3/h for urban hydraulic (>200 000 inhabitants). The flow classes can be modified according to the type of hydraulic proposed for the project. References ------------ .. [1] CIEH. (2001). L’utilisation des méthodes géophysiques pour la recherche d’eaux dans les aquifères discontinus. Série Hydrogéologie, 169. """ self.data = data if self.data_ is None: raise FeatureError("NoneType can not be a data of features.") if self.sanitize is True : self.data_ , utm_flag = sanitize_fdataset(self.data_) # test_df = self._df.copy(deep=True) # df = self.data_.copy() if self.drop_columns is not None: if isinstance(self.drop_columns, str): self.drop_columns = [self.drop_columns] if len(set(list(self.data.columns)).intersection( set(self.drop_columns))) !=len(self.drop_columns): raise FeatureError ( 'Drop values are not found on dataFrame columns. ' 'Please provide the right names for droping.') self.cache = self.drop_columns self.data_.drop(columns = self.drop_columns, inplace =True) if self.mapflow is True : self.data_[self.tname]= categorize_flow( target= self.data_[self.tname], flow_values =self.flow_classes) if self.set_index : # id_= [name for name in self.df.columns if name =='id'] if self.index_col_id !='id': self.data_=self.data_.rename(columns = {self.index_col_id:'id'}) self.index_col_id ='id' try: self.data_.set_index(self.index_col_id, inplace =True) except KeyError : # force to set id self.data_=self.data.rename(columns = {'name':'id'}) self.index_col_id ='id' # self.df.set_index('name', inplace =True) if self.tname =='flow': self.data_ =self.data_.astype({ 'power':np.float, 'magnitude':np.float, 'sfi':np.float, 'ohmS': np.float, 'lwi': np.float, } ) return self
[docs] def writedf(self, df=None , refout:str =None, to:str =None, savepath:str =None, modname:str ='_anEX_', reset_index:bool =False): """ Write the analysis `df`. Refer to :func:`watex.decorators.exportdf` for more details about the arguments ``refout``, ``to``, ``savepath``, ``modename`` and ``rest_index``. :Example: >>> from watex.analysis.bases.features import FeatureInspection >>> slObj =FeatureInspection( ... data_fn='data/geo_fdata/BagoueDataset2.xlsx', ... set_index =True) >>> slObj.writedf() """ for nattr, vattr in zip( ['df', 'refout', 'to', 'savepath', 'modname', 'reset_index'], [df, refout, to, savepath, modname, reset_index]): if not hasattr(self, nattr): setattr(self, nattr, vattr) exportdf(df= self.df , refout=self.refout, to=self.to, savepath =self.savepath, reset_index =self.reset_index, modname =self.modname)