Source code for watex.utils.box

# -*- coding: utf-8 -*-
#   License: BSD-3-Clause
#   Author: LKouadio <etanoyau@gmail.com>
"""
Created on Thu Oct 13 14:52:26 2022

@author: Daniel
"""
[docs]class Boxspace(dict): """Is a container object exposing keys as attributes. BowlSpace objects are sometimes used as an output for functions and methods. They extend dictionaries by enabling values to be accessed by key, `Boxspace["value_key"]`, or by an attribute, `Boxspace.value_key`. Another option is to use Namespace of collection modules as: >>> from collections import namedtuple >>> Boxspace = namedtuple ('Boxspace', [< attribute names >] ) However the explicit class that inhers from build-in dict is easy to handle attributes and to avoid multiple error where the given name in the `names` attributes does not match the expected attributes to fetch. Examples -------- >>> from watex.utils.box import Boxspace >>> bs = Boxspace(pkg='watex', objective ='give water', version ='0.1.dev') >>> bs['pkg'] ... 'watex' >>> bs.pkg ... 'watex' >>> bs.objective ... 'give water' >>> bs.version ... '0.1.dev' """ def __init__(self, **kws): super().__init__(kws) def __setattr__(self, key, value): self[key] = value def __dir__(self): return self.keys() def __getattr__(self, key): try: return self[key] except KeyError: raise AttributeError(key) def __setstate__(self, state): # Overriding __setstate__ to be a noop has the effect of # ignoring the pickled __dict__ pass
class _Group: """ Group of Aquifer is mostly related to area information after multiple boreholes collected. However when predicted 'k' with a missing k-values using the Mixture Learning Strategy (MXS), we intend to solve this problem by creating a Naive Group of Aquifer (NGA) to compensate the missing k-values in the dataset. This could be a good idea to avoid introducing a lot of bias since the group of aquifer is mostly tied to the permeability coefficient 'k'. To do this, an unsupervised learning is used to predict the NGA labels then the NGA labels are used in turn to fill the missing k-values. The best strategy for operting this trick is to seek for some importances between the true k-values with their corresponding aquifer groups at each depth, and find the most representative group. Once the most representative group is found for each true label 'k', the group of aquifer can be renamed as the naive similarity with the true k-label. For instance if true k-value is the label 1 and label 1 is most representative with the group of aquifer 'IV', therefore this group can be replaced throughout the column with 'k1'+'IV=> i.e. 'k14'. This becomes a new label created and is used to fill the true label 'y_true' to become a MXS target ( include NGA label). Note that the true label with valid 'k-value' remained intach and unchanged. The same process is done for label 2, 3 and so on. The selection of MXS label from NGA strongly depends on its preponderance or importance rate in the whole dataset. The following example is the demonstration to how to compute the group representativity in datasets. Parameters ---------- g:dict, Dictionnary compose of occurence between the true labels and the group of aquifer as a function of occurence and repesentativity Example -------- >>> from watex.utils import naive_imputer, read_data , reshape >>> from watex.datasets import load_hlogs >>> from watex.utils.hydroutils import classify_k, find_aquifer_groups >>> b= load_hlogs () #just taking the target names >>> data = read_data ('data/boreholes/hf.csv') # read complete data >>> y = data [b.target_names] >>> # impute the missing values found in aquifer group columns >>> # reshape 1d array along axis 0 for imputation >>> agroup_imputed = naive_imputer ( reshape (y.aquifer_group, axis =0 ) , strategy ='most_frequent') >>> # reshape back to array_like 1d >>> y.aquifer_group =reshape (agroup_imputed) >>> # categorize the 'k' continous value in 'y.k' using the default >>> # 'k' mapping func >>> y.k = classify_k (y.k , default_func =True) >>> # get the group obj >>> group_obj = find_aquifer_groups(y.k, y.aquifer_group, ) >>> group_obj ... _Group(Label=[' 1 ', Preponderance( rate = '53.141 %', [('Groups', {'V': 0.32, 'IV': 0.266, 'II': 0.236, 'III': 0.158, 'IV&V': 0.01, 'II&III': 0.005, 'III&IV': 0.005}), ('Representativity', ( 'V', 0.32)), ('Similarity', 'V')])], Label=[' 2 ', Preponderance( rate = ' 19.11 %', [('Groups', {'III': 0.274, 'II': 0.26, 'V': 0.26, 'IV': 0.178, 'III&IV': 0.027}), ('Representativity', ( 'III', 0.27)), ('Similarity', 'III')])], Label=[' 3 ', Preponderance( rate = '27.749 %', [('Groups', {'V': 0.443, 'IV': 0.311, 'III': 0.245}), ('Representativity', ( 'V', 0.44)), ('Similarity', 'V')])], ) """ def __init__ (self, g=None, / ): self.g_ = g @property def g(self): return self.g_ @property def similarity (self): """return label similarities with NGA labels """ return ( (label, list(rep_val [1])[0] ) for label, rep_val in self.g_.items() ) @property def preponderance (self): """ Returns label occurences in the datasets """ return ( (label, rep_val[0]) for label, rep_val in self.g_.items() ) @property def representativity (self): """ Returns the representativity of each labels""" return ( (label, round(rep_val[1].get(list(rep_val [1])[0]), 2)) for label, rep_val in self.g_.items() ) @property def groups (self): """Return groups for each label """ return ((label, {k: v for k, v in repr_val[1].items()}) for label, repr_val in self.g_.items () ) def __repr__ (self ) : return self.__class__.__name__ + "(" + self._format ( self.g) + "{:>13}".format(")") def _format (self, gdict): """ Format representativity of Aquifer groups Parameters ---------- gdict: dict, Dictionnary compose of occurence of the group as a function of aquifer group repesentativity """ ag=[] for k, (label, repr_val ) in enumerate ( gdict.items() ): prep , g = repr_val ag+=["{:5}=['{:^3}', \n".format( "Label" if k==0 else "{:>17}".format("Label"), label ) ] ag +=["{:>32}( rate = '{:^7} %', \n".format( "Preponderance", round (prep *100, 3 ) )] ag += ["{:>34}'Groups', {}),\n".format("[(", # str({ k: "{:>5}".format(round (v, 3)) for k , v in g.items()}) str({ k: round (v, 3) for k , v in g.items()}) ) ] ag +=["{:>34}'Representativity', ( '{}', {})),\n".format("(", list(g)[0], round ( g.get(list(g)[0]), 2)) ] ag += ["{:>34}'Similarity', '{}')])],\n ".format("(", list(g)[0] ) ] # ag+=['{:>30}'.format("])],\n ")] #ag+=["{:>7}".format(")")] return ''.join (ag)