# -*- coding: utf-8 -*-
# License: BSD-3-Clause
# Author: LKouadio <etanoyau@gmail.com>
"""
:mod:`~watex.methods.hydro` computes Hydrogeological parameters of aquifer
that are the essential and crucial basic data in the designing and
construction progress of geotechnical engineering and groundwater dewatering.
"""
from __future__ import (
division,
annotations
)
import warnings
from abc import ABC, abstractclassmethod
from .._docstring import (
_core_docs,
DocstringComponents
)
from ..exceptions import (
NotFittedError,
StrataError,
kError,
AquiferGroupError
)
from ..utils.hydroutils import (
find_aquifer_groups,
find_similar_labels,
get_aquifer_sections,
reduce_samples,
select_base_stratum,
make_MXS_labels,
predict_NGA_labels
)
from ..utils.funcutils import (
sanitize_frame_cols,
to_numeric_dtypes,
smart_strobj_recognition,
repr_callable_obj,
is_in_if,
)
from ..utils.validator import check_array
from .._watexlog import watexlog
__all__=["Hydrogeology",
"AqSection",
"AqGroup",
"MXS",
"Logging"
]
#-----------------------
_base_params = dict(
aqname="""
aqname: str, optional,
Name of aquifer group column. `aqname` allows to retrieve the
aquifer group `arr_aq` value in a specific dataframe. Commonly
`aqname` needs to be supplied when a dataframe is passed as a positional
or keyword argument. Note that it is not mandatory to have a group of
aquifer in the log data. It is needed only if the label similarity
needs to be calculated.
""",
sname="""
sname: str, optional
Name of column in the dataframe that contains the strata values.
Dont confuse 'sname' with 'stratum' which is the name of the valid
layer/rock in the array/Series of strata.
""",
)
_param_docs = DocstringComponents.from_nested_components(
core=_core_docs["params"],
base= DocstringComponents(_base_params)
)
#------------------------
class HData(ABC):
@abstractclassmethod
def __init__(
self,
kname=None,
zname=None,
aqname=None,
sname=None,
verbose=0
):
self._logging = watexlog.get_watex_logger(self.__class__.__name__)
self.kname=kname
self.zname=zname
self.aqname=aqname
self.sname=sname
self.verbose=verbose
def fit(
self,
data,
**fit_params
):
"""
Fit Hydro-data and populate attributes.
Note that each column of the dataframe can be retrieved as an attribute
value. The attribute maker replace all spaces in the items if exist
in data columns with '_'. For instance, retrieving the 'layer thickness'
as an items in the data should be: 'layer_thickness' like::
>>> from watex.datasets import load_hlogs
>>> from watex.methods.hydro import HData
>>> h=HData ().fit(load_hlogs().frame)
>>> h.layer_thickness # for retrieving 'layer thickness'
Parameters
-----------
data : Dataframe of shape (n_samples, n_features)
where `n_samples` is the number of data, expected to be the data
collected at different depths and `n_features` is the number of
columns (features) that supposed to be plot.
Note that `X` must include the ``depth`` columns. If not given a
relative depth should be created according to the number of
samples that composes `data`.
fit_params: dict,
Additional keyword arguments passed to
:func:`~watex.utils.funcutils.to_numeric_dtypes`.
"""
data = check_array (
data,
force_all_finite= "allow-nan",
dtype =object ,
input_name="Data",
to_frame=True,
)
data = sanitize_frame_cols(data, fill_pattern= '_' )
self.data_, nf, cf = to_numeric_dtypes(
data ,
return_feature_types= True,
verbose =self.verbose,
**fit_params
)
self.feature_names_in_ = nf + cf
if len(cf )!=0:
# sanitize the categorical values
for c in cf : self.data_ [c] = self.data_[c].str.strip()
for name, val in zip (("k", "z", "aq", "s"), (
self.kname, self.zname, self.aqname, self.sname)):
if val:
c=val
val = is_in_if (list(self.data_.columns), val,
error ='ignore')
if val is None and self.verbose :
warnings.warn(f" Invalid '{name}name'={c!r}. Name not "
"found in the given dataset. None is set "
"instead.")
setattr (self, f"{name}_",
self.data_[val[0]] if val else val
)
for name in self.data_.columns :
setattr (self, name, self.data_[name])
return self
[docs] def squeeze_data (self, strategy="average", **rs_kws):
""" Compressed data by sample reducing
To compress many boreholes data, it is recommended to use
:func:`get_unique_section`.
Parameters
----------
sname: str, optional
Name of column in the dataframe that contains the strata values.
Dont confuse 'sname' with 'stratum' which is the name of the valid
layer/rock in the array/Series of strata.
strategy: str , default='average' or 'mean',
strategy used to select or compute the numerical data into a
singular series. It can be ['naive']. In that case , a single serie
if randomly picked up into the base strata data.
rs_kws: dict,
keyword arguments passed to
:func:`~watex.utils.hydroutils.reduce_samples`
Returns
----------
sqdat: pandas.dataframes
new dataframe with reducing samples.
"""
self.inspect
if self.sname is None:
raise StrataError (
"'sname' cannot be none for data compressing. Refer to"
" :func:`~watex.utils.hydroutils.reduce_samples` for"
" pure examples.")
sqdat = reduce_samples(
self.data_,
sname= self.sname,
zname=self.zname,
kname =self.kname,
strategy = strategy,
**rs_kws
)[0]
return sqdat
[docs] def get_base_stratum (self , stratum=None ):
"""Select the base stratum
Parameters
-----------
stratum: str, optional
Name of the base stratum. Must be self contain as an item of the
strata data. Note that if `stratum` is passed, the auto-detection of
base stratum is not triggered. It returns the same stratum.
Returns
---------
base_stratum : str
the most recurrent stratum in the data and compute the rate of
occurrence.
"""
self.inspect
self.base_stratum_ = select_base_stratum(
self.data_,
sname = self.sname ,
stratum =stratum,
return_counts=False,
return_rate=False,
)
return self.base_stratum_
@property
def inspect (self):
""" Inspect object whether is fitted or not"""
msg = ( "{obj.__class__.__name__} instance is not fitted yet."
" Call 'fit' with appropriate arguments before using"
" this method"
)
if not hasattr (self, 'data_'):
raise NotFittedError(msg.format(
obj=self)
)
return 1
def __repr__(self):
""" Pretty format for programmer guidance following the API... """
t =("kname", "zname", "aqname", "sname", "verbose" )
outm = ( '<{!r}:' + ', '.join(
[f"{k}={getattr(self, k)!r}" for k in t]) + '>'
)
return outm.format(self.__class__.__name__)
def __getattr__(self, name):
_getattr_(self, name)
HData.__doc__="""\
Hydro-Log data , Abstract Base class and can't be instanciated.
Hydro-log data is a mixed data composed of logging data, borehole data
and geological data. To only used the logging data, it recommended to use
:class:`~.watex.methods.hydro.Logging` instead.
Parameters
------------
{params.core.kname}
{params.core.zname}
{params.base.aqname}
{params.base.sname}
""".format (params =_param_docs ,
)
[docs]class AqSection (HData):
def __init__(
self,
aqname=None,
kname=None,
zname= None,
**kws
):
super().__init__(
kname =kname ,
aqname= aqname,
zname= zname,
**kws
)
[docs] def findSection(
self,
z= None,
depth_unit ="m"
):
""" Find aquifer valid section (upper and lower section )
Parameters
-----------
z: array-like 1d, pandas.Series
Array of depth or a pandas series that contains the depth values.
Two dimensional array or more is not allowed. However when `z`
is given as a dataframe and `zname` is not supplied, an error
raises since `zname` is used to fetch and overwritten `z`
from the dataframe.
Returns
--------
self.section_: list of float
valid upper and lower section in SI units (m) if depth values are
given in meters.
"""
self.inspect
self.section_ = get_aquifer_sections(
self.data_ ,
zname=self.zname,
kname= self.kname,
return_data= False,
return_index= False,
z=z,
)[0]
if self.verbose:
print("### The valid section of aquifer is {} to {} {}."
.format(self.section_[0], self.section_[-1],
depth_unit)
)
return self.section_
AqSection.__doc__="""\
Aquifer section class
Get the section of each aquifer from dataframe.
The unique section 'upper' and 'lower' is the valid range of the whole
data to consider as a valid data. Indeed, the aquifer section computing
is necessary to shrunk the data of the whole boreholes. Mosly the data
from the section is consided the valid data as the predictor Xr. Out of the
range of aquifers ection, data can be discarded or compressed to top Xr.
Parameters
------------
{params.base.aqname}
{params.core.kname}
{params.core.zname}
""".format(params =_param_docs )
[docs]class MXS (HData):
def __init__(
self,
kname=None,
aqname=None,
threshold:float=None,
method:str="naive",
trailer:str="*",
keep_label_0:bool=False,
random_state:int=42,
n_groups:int=3,
sep:str=None,
prefix=None,
**kws
):
super().__init__(
kname =kname,
aqname =aqname,
**kws
)
self.threshold=threshold
self.method=method
self.n_groups=n_groups
self.trailer=trailer
self.keep_label_0=keep_label_0
self.random_state=random_state
self.sep=sep
self.prefix=prefix
[docs] def predictNGA (
self,
n_components:int=2 ,
return_label=False,
**NGA_kws
):
""" Predicts Naive Group of Aquifer from Hydro-Log data.
Parameters
------------
n_components: int, default=2
Number of dimension to preserve. If`n_components` is ranged
between float 0. to 1., it indicates the number of variance
ratio to preserve. If ``None`` as default value the number of
variance to preserve is ``95%``.
return_label: bool,default=False
If `True`, return the NGA label predicted, otherwise return
:class:`~.MXS` instanciated object. if ``False``, NGA label
can be fetch using the attribute
:attr:`watex.hydro.MXS.yNGA_`
NGA_kws: dict,
keyword argument passed to :func:`watex.utils.predict_NGA_labels`
Returns
--------
yNGA_ or self : arraylike-1d of naive group of aquifer or
:class:`~.MXS` instanciated object.
Example
--------
>>> from watex.datasets import load_hlogs
>>> from watex.methods.hydro import MXS
>>> hdata = load_hlogs ().frame
>>> # drop the 'remark' columns since there is no valid data
>>> hdata.drop (columns ='remark', inplace=True)
>>> mxs =MXS (kname ='k').fit(hdata) # specify the 'k' column
>>> y_pred = mxs.predictNGA(return_label=True )
>>> y_pred [-12:]
Out[52]: array([1, 3, 1, 3, 3, 3, 3, 1, 3, 3, 3, 3])
"""
self.inspect
from ..analysis.dimensionality import nPCA
from ..utils.mlutils import (
naive_imputer,
naive_scaler
)
X= to_numeric_dtypes(
self.data_,
pop_cat_features= True ,
verbose =self.verbose
)
X= nPCA(naive_scaler (naive_imputer(X)),
n_components= n_components ,
random_state=self.random_state,
view=False,
return_X=True,
plot_kws=dict(),
)
self.yNGA_, self.cluster_centers_= predict_NGA_labels(
X, n_clusters= self.n_groups,
return_cluster_centers= True,
keep_label_0= self.keep_label_0 ,
random_state= self.random_state,
**NGA_kws
)
return self.yNGA_ if return_label else self
[docs] def makeyMXS (
self,
y_pred=None,
func:callable=None,
categorize_k= False,
default_func= False,
**mxs_kws
):
r""" Construct the MXS target :math:`y*`
Parameters
-----------
y_pred: Array-like 1d, pandas.Series
Array composing the valid NGA labels. Note that NGA labels is a
predicted labels mostly using the unsupervising learning.
:seealso: :func:`~predict_NGA_labels` for further details.
func: callable
Function to specifically map the permeability coefficient column
in the dataframe of serie. If not given, the default function can be
enabled instead from param `default_func`.
string: bool,
If set to "True", categorized map from 'k' should be prefixed by "k".
However is string value is given , the prefix is changed according
to this label.
default_ufunc: bool,
Default function for mapping k is setting to ``True``. Note that, this
could probably not fitted your own data. So it is recommended to
provide your own function for mapping 'k'. However the default 'k'
mapping is given as follow:
- k0 {0}: k = 0
- k1 {1}: 0 < k <= .01
- k2 {2}: .01 < k <= .07
- k3 {3}: k> .07
mxs_kws:dict,
Additional keyword arguments passed to
:func:`~.watex.utils.make_MXS_labels`.
Returns
--------
MXS.mxs_labels_: array-like 1d `
array like of MXS labels
Example
--------
>>> from watex.datasets import load_hlogs
>>> from watex.methods.hydro import MXS
>>> hdata = load_hlogs ().frame
>>> # drop the 'remark' columns since there is no valid data
>>> hdata.drop (columns ='remark', inplace=True)
>>> mxs =MXS (kname ='k').fit(hdata) # specify the 'k'columns
>>> # we can predict the NGA labels and yMXS with single line
>>> # of code snippet using the default 'k' classification.
>>> ymxs = mxs.predictNGA().makeyMXS(categorize_k=True, default_func=True)
>>> mxs.yNGA_[:7]
... array([2, 2, 2, 2, 2, 2, 2])
>>> ymxs[:7]
Out[40]: array([22, 22, 22, 22, 22, 22, 22])
>>> mxs.mxs_group_classes_
Out[56]: {1: 1, 2: 22, 3: 3} # transform classes
>>> mxs.mxs_group_labels_
Out[57]: (2,)
>>> # **comment:
# # only the label '2' is tranformed to '22' since
# it is the only one that has similariry with the true label 2
"""
self.inspect
if self.k_ is None:
raise kError ("'k' data for permeability coefficient cannot"
" be None. Specify the name of the column 'kname'"
" that fits the permeability coefficient values"
" in the hydro-log dataset."
)
if (
not hasattr (self, 'yNGA_')
and y_pred is None
) :
raise AquiferGroupError (
"y_pred for Naive Group of Aquifer (NGA) cannot be "
" None. Use :meth:`~predictNGA` method or"
" :func:`~.watex.utils.predict_NGA_labels` to"
" predict NGA labels first."
)
elif (
hasattr (self, "yNGA_")
and y_pred is None
):
y_pred = self.yNGA_
MXS = make_MXS_labels(
self.k_,
y_pred,
threshold= self.threshold,
trailer=self.trailer,
method=self.method,
return_groups=False,
return_obj= True,
kname=self.kname,
keep_label_0=self.keep_label_0,
sep=self.sep,
prefix=self.prefix,
inplace=False,
categorize_k=categorize_k,
default_func=default_func,
func=func,
**mxs_kws
)
for key in MXS.keys ():
setattr(self, key, MXS[key])
return MXS.mxs_labels_
[docs] def labelSimilarity(
self,
func:callable=None,
categorize_k= False,
default_func= False,
**sm_kws
):
"""Find label similarities
Parameters
-----------
func: callable
Function to specifically map the permeability coefficient column
in the dataframe of serie. If not given, the default function can be
enabled instead from param `default_func`.
string: bool,
If set to "True", categorized map from 'k' should be prefixed by "k".
However is string value is given , the prefix is changed according
to this label.
default_ufunc: bool,
Default function for mapping k is setting to ``True``. Note that, this
could probably not fitted your own data. So it is recommended to
provide your own function for mapping 'k'. However the default 'k'
mapping is given as follow:
- k0 {0}: k = 0
- k1 {1}: 0 < k <= .01
- k2 {2}: .01 < k <= .07
- k3 {3}: k> .07
sm_kws:dict,
Additional keyword arguments passed to
:func:`~.watex.utils.find_similar_labels`.
"""
self.inspect
msg =("{0!r} data for {1} cannot be None. Specify the name of the "
"column {2!r} that fits the {1} values in the hydro-log dataset."
)
if self.k_ is None:
raise kError (msg.format("k","permeability coefficient", "kname" ))
if self.aq_ is None:
raise AquiferGroupError(msg.format(
"aq", "aquifer groups", "aqname")
)
similar_labels= find_similar_labels(
self.k_,
self.aq_,
threshold=self.threshold,
keep_label_0=self.keep_label_0,
method=self.method,
return_groups=False,
**sm_kws
)
return similar_labels
MXS.__doc__="""\
Mixture Learning Strategy (MXS)
The use of machine learning for k-parameter prediction seems an alternative
way to reduce the cost of data collection thereby saving money. However,
the borehole data comes with a lot of missing k since the parameter is
strongly tied to the aquifer after the pumping test. In other words, the
k-parameter collection is feasible if the layer in the well is an aquifer.
Unfortunately, predicting some samples of k in a large set of missing data
remains an issue using the classical supervised learning methods. We,
therefore propose an alternative approach called a mixture learning
strategy (MXS) to solve these double issues. It entails predicting upstream
a naïve group of aquifers (NGA) combined with the real values k to
counterbalance the missing values and yield an optimal prediction score.
The method, first, implies the K-Means and Hierarchical Agglomerative
Clustering (HAC) algorithms. K-Means and HAC are used for NGA label
predicting necessary the MXS label merging.
Parameters
-----------
{params.core.kname}
{params.base.aqname}
threshold: float, default=None
The threshold from which, label in 'k' array can be considered
similar than the one in NGA labels 'y_pred'. The default is 'None' which
means none rule is considered and the high preponderence or occurence
in the data compared to other labels is considered as the most
representative and similar. Setting the rule instead by fixing
the threshold is recommended especially in a huge dataset.
n_groups : int, default=3
The number of aquifer n_groups to form as well as the number of
centroids to generate. If a idea about the number of aquifer group
in the areas, it should be used instead. Hiwever, it is recommended
to validate this number using the 'elbow plot' or the 'silhouette
plot' or the Hierachical Agglomerative Clustering dendrogram.
Refer to :func:`~watex.utils.plot_elbow` or
:func:`~.watex.view.plotSilhouette`
or :func:~.watex.view.plotDendrogram` for plotting purpose.
keep_label_0: bool, default=False
The prediction already include the label 0. However, including 0 in
the predicted label refers to 'k=0' i.e. no permeability coefficient
equals to 0, which is not True in principle, because all rocks have
a permeability coefficient 'k'. Here we considered 'k=0' as an undefined
permeability coefficient. Therefore, '0' , can be exclude since, it can
also considered as a missing 'k'-value. If predicted '0' is in the target
it should mean a missing 'k'-value rather than being a concrete label.
Therefore, to avoid any confusion, '0' is altered to '1' so the value
`+1` is used to move forward all class labels thereby excluding
the '0' label. To force include 0 in the label, set `keep_label_0`
to ``True``.
sep: str, default''
Separator between the true labels 'y_true' and predicted NGA labels.
Sep is used to rewrite the MXS labels. Mostly the MXS labels is a
combinaison with the true label of permeability coefficient 'k' and
the label of NGA to compose new similarity labels. For instance
>>> true_labels=['k1', 'k2', 'k3'] ; NGA_labels =['II', 'I', 'UV']
>>> # gives
>>> MXS_labels= ['k1_II', 'k2_I', 'k3_UV']
where the seperator `sep` is set to ``_``. This happens especially
when one of the label (NGA or true_labels) is not a numeric datatype
and a similariy is found between 'k1' and 'II', 'k2' and 'I' and so on.
prefix: str, default=''
prefix is used to rename the true_labels i.e the true valid-k. For
instance::
>>> k_valid =[1, 2, ..] -> k_new = [k1, k2, ...]
where 'k' is the prefix.
method: str ['naive', 'strict'], default='naive'
The kind of strategy to compute the representativity of a label
in the predicted array 'y_pred'. It can also be 'strict'. Indeed:
- ``naive`` computes the importance of the label by the number of its
occurence for this specific label in the array 'y_true'. It does not
take into account of the occurence of other existing labels. This
is usefull for unbalanced class labels in `y_true`.
- ``strict`` computes the importance of the label by the number of
occurence in the whole valid `y_true` i.e. under the total of
occurence of all the labels that exist in the whole 'arra_aq'.
This can give a suitable anaylse results if the data is not
unbalanced for each labels in `y_pred`.
trailer: str, default='*'
The Mixture strategy marker to differentiate the existing class label
in 'y_true' with the predicted labels 'y_pred' especially when
the the same class labels are also present the true label with the
same label-identifier name. This usefull to avoid any confusion for
both labels in `y_true` and `y_pred` for better demarcation and
distinction. Note that if the `trailer`is set to ``None`` and both
`y_true` and `y_pred` are numeric data, the labels in `y_pred` are
systematically renamed to be distinct with the ones in the 'y_true'.
For instance ::
>>> true_labels=[1, 2, 3] ; NGA_labels =[0, 1, 2]
>>> # with trailer , MXS labels should be
>>> MXS_labels= ['0', '1*', '2*', '3'] # 1 and 2 are in true_labels
>>> # with no trailer
>>> MXS_labels= [0, 4, 5, 3] # 1 and 2 have been changed to [4, 5]
{params.core.verbose}
Examples
---------
>>> from watex.datasets import load_hlogs
>>> from watex.methods.hydro import MXS
>>> hdata= load_hlogs (as_frame =True)
>>> # drop the 'remark' columns since there is no valid data
>>> hdata.drop (columns ='remark', inplace =True)
>>> mxs = MXS (kname ='k').fit(hdata)
>>> # predict the default NGA
>>> mxs.predictNGA() # default prediction with n_groups =3
>>> # make MXS labels using the default 'k' categorization
>>> ymxs=mxs.makeyMXS(categorize_k=True, default_func=True)
>>> mxs.yNGA_ [62:74]
Out[43]: array([1, 2, 2, 2, 3, 1, 2, 1, 2, 2, 1, 2])
>>> ymxs[62:74]
Out[44]: array([ 1, 22, 22, 22, 3, 1, 22, 1, 22, 22, 1, 22])
>>> # to get the label similariry , need to provide the
>>> # the column name of aquifer group and fit again like
>>> mxs = MXS (kname ='k', aqname ='aquifer_group').fit(hdata)
>>> sim = mxs.labelSimilarity()
>>> sim
Out[47]: [(0, 'II')] # group II and label 0 are very similar
""" .format(
params =_param_docs
)
[docs]class Logging :
"""
Logging class
Only deal with numerical values. If categorical values are find in the
logging dataset, they should be discarded.
Parameters
-----------
zname: str, default='depth' or 'None'
The name of the depth column in `data`. If the name 'depth' is not
specified as the main depth columns, an other name in the columns
that matches the depth can also be indicated so the function will put
aside this columm as depth column for plot purpose. If set to ``None``,
`zname` holds the name ``depth`` and assumes that depth exists in
`data` columns.
kname: str, int
Name of permeability coefficient columns. `kname` allows to retrieve the
permeability coefficient 'k' in a specific dataframe. If integer is passed,
it assumes the index of the dataframe fits the 'k' columns. Note that
integer value must not be out the dataframe size along axis 1. Commonly
`kname` needs to be supplied when a dataframe is passed as a positional
or keyword argument.
Examples
----------
>>> from watex.datasets import load_hlogs
>>> from watex.methods.hydro import Logging
>>> # get the logging data
>>> h = load_hlogs ()
>>> h.feature_names
Out[29]:
['hole_id',
'depth_top',
'depth_bottom',
'strata_name',
'rock_name',
'layer_thickness',
'resistivity',
'gamma_gamma',
'natural_gamma',
'sp',
'short_distance_gamma',
'well_diameter']
>>> # we can fit to collect the valid logging data
>>> log= Logging(kname ='k', zname='depth_top' ).fit(h.frame[h.feature_names])
>>> log.feature_names_in_ # categorical features should be discarded.
Out[33]:
['depth_top',
'depth_bottom',
'layer_thickness',
'resistivity',
'gamma_gamma',
'natural_gamma',
'sp',
'short_distance_gamma',
'well_diameter']
>>> log.plot ()
Out[34]: Logging(zname= depth_top, kname= k, verbose= 0)
>>> # plot log including the target y
>>> log.plot (y = h.frame.k , posiy =0 )# first position
Logging(zname= depth_top, kname= k, verbose= 0)
"""
def __init__(
self,
zname=None,
kname=None,
verbose=0
):
self._logging = watexlog.get_watex_logger(self.__class__.__name__)
self.zname=zname
self.kname=kname
self.verbose=verbose
[docs] def fit(
self,
data,
**fit_params
)->"Logging":
"""
Fit logging data and populate attributes
Parameters
-----------
data : Dataframe of shape (n_samples, n_features)
where `n_samples` is the number of data, expected to be the data
collected at different depths and `n_features` is the number of
columns (features) that supposed to be plot.
Note that `X` must include the ``depth`` columns. If not given a
relative depth should be created according to the number of
samples that composes `data`.
fit_params: dict,
Additional keyword arguments passed to
:func:`~.watex.utils.funcutils.to_numeric_dtypes`.
Returns
-------
self: object instanciated for chaining methods.
"""
data = check_array (
data,
force_all_finite= "allow-nan",
dtype =object ,
input_name="data",
to_frame= True,
)
self.data_= to_numeric_dtypes(
data , pop_cat_features= True,
verbose =self.verbose,
**fit_params
)
self.feature_names_in_ = list(self.data_ )
return self
[docs] def plot (
self,
normalize = False,
impute_nan= True,
log10=False,
posiy=None,
fill_value = None,
**plot_kws
):
""" Plot the logging data
Parameters
-----------
normalize: bool, default = False
Normalize all the data to be range between (0, 1) except the `depth`,
impute_nan: bool, default=True,
Replace the NaN values in the dataframe. Note that the default
behaviour for replacing NaN is the ``mean``. However if the argument
of `fill_value` is provided,the latter should be used to replace 'NaN'
in `X`.
log10: bool, default=False
Convert values to log10. This can be usefull when using the logarithm
data. However, it seems not all the data can be used this operation,
for instance, a negative data. In that case, `column_to_skip` argument
is usefull to provide so to skip that columns when converting values
to log10.
fill_value : str or numerical value, optional
When strategy == "constant", fill_value is used to replace all
occurrences of missing_values.
If left to the default, fill_value will be 0 when imputing numerical
data and "missing_value" for strings or object data types. If not
given and `impute_nan` is ``True``, the mean strategy is used instead.
posiy: int, optional
the position to place the target plot `y` . By default the target plot
if given is located at the last position behind the logging plots.
"""
self.inspect
from ..utils.plotutils import plot_logging
plot_logging (
self.data_,
tname = self.kname,
zname =self.zname,
normalize = normalize,
impute_nan= impute_nan,
log10=log10,
posiy=posiy,
fill_value = fill_value,
**plot_kws
)
return self
def __repr__(self):
""" Pretty format for programmer guidance following the API... """
return repr_callable_obj (self)
def __getattr__(self, name):
_getattr_(self, name)
@property
def inspect (self):
""" Inspect object whether is fitted or not"""
msg = ( "{obj.__class__.__name__} instance is not fitted yet."
" Call 'fit' with appropriate arguments before using"
" this method"
)
if not hasattr (self, 'data_'):
raise NotFittedError(msg.format(
obj=self)
)
return 1
[docs]class AqGroup (HData):
def __init__ (
self,
kname =None,
aqname =None,
method="naive",
keep_label_0=False,
**kws
):
super().__init__(
kname =kname,
aqname=aqname,
**kws
)
self.method=method
self.keep_label_0=keep_label_0
[docs] def findGroups (
self ,
method="naive",
default_arr = None,
**g_kws
):
""" Find the existing group between the permeability coefficient `k`
and the group of aquifer.
It computes the occurence between the true labels
and the group of aquifer as a function of occurence and
repesentativity.
Parameters
----------
keep_label_0: bool, default=False
The prediction already include the label 0. However, including 0 in
the predicted label refers to 'k=0' i.e. no permeability coefficient
equals to 0, which is not True in principle, because all rocks have
a permeability coefficient 'k'. Here we considered 'k=0' as an undefined
permeability coefficient. Therefore, '0' , can be exclude since, it can
also considered as a missing 'k'-value. If predicted '0' is in the target
it should mean a missing 'k'-value rather than being a concrete label.
Therefore, to avoid any confusion, '0' is altered to '1' so the value
`+1` is used to move forward all class labels thereby excluding
the '0' label. To force include 0 in the label, set `keep_label_0`
to ``True``.
method: str ['naive', 'strict'], default='naive'
The kind of strategy to compute the representativity of a label
in the predicted array 'y_pred'. It can also be 'strict'. Indeed:
- ``naive`` computes the importance of the label by the number of its
occurence for this specific label in the array 'y_true'. It does not
take into account of the occurence of other existing labels. This
is usefull for unbalanced class labels in `y_true`.
- ``strict`` computes the importance of the label by the number of
occurence in the whole valid `y_true` i.e. under the total of
occurence of all the labels that exist in the whole 'arra_aq'.
This can give a suitable anaylse results if the data is not
unbalanced for each labels in `y_pred`.
Returns
--------
g: _Group: :class:`~.box._Group` class object
Use attribute `.groups` to find the group values.
"""
self.inspect
msg =("{0!r} data for {1} cannot be None. Specify the name of the "
"column {2!r} that fits the {1} values in the hydro-log dataset."
)
if self.k_ is None:
raise kError (msg.format("k","permeability coefficient", "kname" ))
if self.aq_ is None:
raise AquiferGroupError(msg.format(
"aq", "aquifer groups", "aqname")
)
g= find_aquifer_groups(
self.k_, self.aq_,
kname=self.kname ,
aqname = self.aqname,
method=method,
**g_kws
)
return g
AqGroup.__doc__="""\
Group of Aquifer is mostly related to area information after multiple
boreholes collected.
However when predicted 'k' with a missing k-values using the Mixture
Learning Strategy (MXS), we intend to solve this problem by creating
a Naive Group of Aquifer (NGA) to compensate the missing k-values in the
dataset. This could be a good idea to avoid introducing a lot of bias since
the group of aquifer is mostly tied to the permeability coefficient 'k'.
To do this, an unsupervised learning is used to predict the NGA labels then
the NGA labels are used in turn to fill the missing k-values. The best
strategy for operting this trick is to seek for some importances between
the true k-values with their corresponding aquifer groups at each depth,
and find the most representative group. Once the most representative group
is found for each true label 'k', the group of aquifer can be renamed as
the naive similarity with the true k-label. For instance if true k-value
is the label 1 and label 1 is most representative with the group of aquifer
'IV', therefore this group can be replaced throughout the column
with 'k1'+'IV=> i.e. 'k14'. This becomes a new label created and is used to
fill the true label 'y_true' to become a MXS target ( include NGA label).
Note that the true label with valid 'k-value' remained intact and unchanged.
The same process is done for label 2, 3 and so on. The selection of MXS
label from NGA strongly depends on its preponderance or importance rate in
the whole dataset.
The following example is the demonstration to how to compute the group
representativity in datasets.
Parameters
----------
{params.core.kname}
{params.base.aqname}
g:dict,
Dictionnary compose of occurence between the true labels
and the group of aquifer as a function of occurence and
repesentativity
Example
--------
>>> from watex.methods.hydro import AqGroup
>>> hg = AqGroup (kname ='k', aqname='aquifer_group').fit(hdata )
>>> hg.findGroups ()
Out[25]:
_Group(Label=[' 0 ',
Preponderance( rate = ' 100.0 %',
[('Groups', {{'II': 1.0}}),
('Representativity', ( 'II', 1.0)),
('Similarity', 'II')])],
)
""".format(params = _param_docs)
#XXX TODO
[docs]class Hydrogeology(ABC):
"""
A branch of geology concerned with the occurrence, use, and functions of
surface water and groundwater.
Hydrogeology is the study of groundwater – it is sometimes referred to as
geohydrology or groundwater hydrology. Hydrogeology deals with how water
gets into the ground (recharge), how it flows in the subsurface
(through aquifers) and how groundwater interacts with the surrounding soil
and rock (the geology).
Indeed, hydrogeologists apply this knowledge to many practical uses.
They might:
* Design and construct water wells for drinking water supply, irrigation
schemes and other purposes;
* Try to discover how much water is available to sustain water supplies
so that these do not adversely affect the environment – for example,
by depleting natural baseflows to rivers and important wetland
ecosystems;
* Investigate the quality of the water to ensure that it is fit for its
intended use;
* Where the groundwater is polluted, they design schemes to try and
clean up this pollution;
Design construction dewatering schemes and deal with groundwater
problems associated with mining; Help to harness geothermal energy
through groundwater-based heat pumps.
"""
@abstractclassmethod
def __init__(
self,
**kwd
):
self._logging = watexlog.get_watex_logger(self.__class__.__name__)
#xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
def _getattr_(self, name):
""" Isolated part of __getattr__ to reformat the attribute getter. """
rv = smart_strobj_recognition(name, self.__dict__, deep =True)
appender = "" if rv is None else f'. Do you mean {rv!r}'
if name =='yNGA_':
err_msg =(". Call 'predictNGA' method to fetch attribute 'yNGA_'")
else: err_msg = f'{appender}{"" if rv is None else "?"}'
raise AttributeError (
f'{self.__class__.__name__!r} object has no attribute {name!r}'
f'{err_msg}'
)