Source code for watex.utils.plotutils

# -*- coding: utf-8 -*-
#   License: BSD-3-Clause
#   Author: LKouadio <etanoyau@gmail.com>
"""
Additional plot utilities. 
"""
from __future__ import annotations 
import os
import re 
import copy 
import datetime 
import warnings
import itertools 
import numpy as np
import pandas as pd 
import matplotlib as mpl 
from matplotlib.patches import Ellipse
import matplotlib.colors as mcolors
import matplotlib.transforms as transforms 
from matplotlib import gridspec 
import seaborn as sns 
from scipy.cluster.hierarchy import ( 
    dendrogram, ward 
    )
import scipy.sparse as sp
import matplotlib.pyplot as plt

from ..exceptions import ( 
    TipError, 
    PlotError, 
    )
from .funcutils import  ( 
    _assert_all_types,
    is_iterable, 
    to_numeric_dtypes, 
    make_obj_consistent_if, 
    str2columns, 
    is_in_if, 
    is_depth_in, 
    reshape, 
    )
from .validator import  ( 
    _check_array_in  , 
    _is_cross_validated,
    assert_xy_in, 
    get_estimator_name,
    check_array, 
    check_X_y,
    check_y,
    check_consistent_length, 
    check_is_fitted , 
    _assert_z_or_edi_objs, 
    )
from ._dependency import import_optional_dependency 
from ..decorators import nullify_output
try: 
    from ..exlib.sklearn import ( 
        learning_curve ,   
        confusion_matrix, 
        RandomForestClassifier, 
        LogisticRegression, 
        MinMaxScaler, 
        SimpleImputer, 
        KMeans, 
        silhouette_samples, 
        roc_curve, 
        roc_auc_score, 
        ) 
except : pass 
 
try : 
    from yellowbrick.classifier import ConfusionMatrix 
except: pass 

D_COLORS =[
    'g',
    'gray',
    'y', 
    'blue',
    'orange',
    'purple',
    'lime',
    'k', 
    'cyan', 
    (.6, .6, .6),
    (0, .6, .3), 
    (.9, 0, .8),
    (.8, .2, .8),
    (.0, .9, .4)
]

D_MARKERS =[
    'o',
    '^',
    'x',
    'D',
    '8',
    '*',
    'h',
    'p',
    '>',
    'o',
    'd',
    'H'
]

D_STYLES = [
    '-',
    '-',
    '--',
    '-.',
    ':', 
    'None',
    ' ',
    '',
    'solid', 
    'dashed',
    'dashdot',
    'dotted' 
]
#----


[docs]def plot_logging ( X, y=None, zname = None, tname = None, labels=None, impute_nan=True , normalize = False, log10=False, columns_to_skip =None, pattern = None, strategy='mean', posiy= None, fill_value = None, fig_size = (16, 7), fig_dpi = 300, colors = None, cs4_colors=False, sns_style =False, savefig = None, draw_spines=False, seed=None, verbose=0, **kws ): """ Plot logging data Plot expects a collection of logging data. Each logging data composes a column of data collected on the field.Note that can also plot anykind of data related that it contains numerical values. The function does not accept categorical data. If categorical data are given, they should be discarded. Parameters ----------- X : Dataframe of shape (n_samples, n_features) where `n_samples` is the number of data, expected to be the data collected at different depths and `n_features` is the number of columns (features) that supposed to be plot. Note that `X` must include the ``depth`` columns. If not given a relative depth should be created according to the number of sample that composes `X`. y : array-like or series of shape (n_samples,), optional Target relative to X for classification or regression; If given, by default the target plot should be located at the last position. However with the argument of `posiy` , target plot can be toggled to the desired position. zname: str, default='depth' or 'None' The name of the depth column in `X`. If the name 'depth' is not specified as the main depth columns, an other name in the columns that matches the depth can also be indicated so the function will put aside this columm as depth column for plot purpose. If set to ``None``, `zname` holds the name ``depth`` and assumes that depth exists in `X` columns. tname: str, optional, name of the target. This can rename of the target name if given `y` as a pandas series or add the name of target if given as an array-like. If not provided, it should use the name of the target series if `y` is not None. normalize: bool, default = False Normalize all the data to be range between (0, 1) except the `depth`, labels: list or str, optional If labels are given, they should fit the size of the number of columns. The given labels should replace the old columns in `X` and should figue out in the plot. This is usefull to change the columns labels in the dataframe to a new labels that describe the best the plot ; for instance by inluding the units in the new labels. Note that if the labels do not match the size of the old columns in `X` a warning should be let to the user and none operation will be performed. impute_nan: bool, default=True, Replace the NaN values in the dataframe. Note that the default behaviour for replacing NaN is the ``mean``. However if the argument of `fill_value` is provided,the latter should be used to replace 'NaN' in `X`. log10: bool, default=False Convert values to log10. This can be usefull when using the logarithm data. However, it seems not all the data can be used this operation, for instance, a negative data. In that case, `column_to_skip` argument is usefull to provide so to skip that columns when converting values to log10. columns_to_skip: list or str, optional, Columns to skip when performing some operation like 'log10'. These columns with not be affected by the 'log10' operations. Note that `columns_to_skip` can also gives as litteral string. In that case, the `pattern` is need to parse the columns into a list of string. pattern: str, default = '[#&*@!,;\s]\s*' Regex pattern to parse the `columns_to_skip` into a list of string where each item is a column name especially when the latter is given as litteral text string. For instance:: columns_to_skip='depth_top, thickness, sp, gamma_gamma' -> ['depth_top', 'thickness', 'sp', 'gamma_gamma'] by using the default pattern. To have full control of columns splitted it is recommended to provided your own pattern to avoid wrong parsing and can lead to an error. strategy : str, default='mean' The imputation strategy. - If "mean", then replace missing values using the mean along each column. Can only be used with numeric data. - If "median", then replace missing values using the median along each column. Can only be used with numeric data. - If "most_frequent", then replace missing using the most frequent value along each column. Can be used with strings or numeric data. If there is more than one such value, only the smallest is returned. - If "constant", then replace missing values with fill_value. Can be used with strings or numeric data. fill_value : str or numerical value, optional When strategy == "constant", fill_value is used to replace all occurrences of missing_values. If left to the default, fill_value will be 0 when imputing numerical data and "missing_value" for strings or object data types. If not given and `impute_nan` is ``True``, the mean strategy is used instead. posiy: int, optional the position to place the target plot `y` . By default the target plot if given is located at the last position behind the logging plots. colors: str, list of Matplotlib.colors map, optional The colors for plotting each columns of `X` except the depth. If not given, default colors are auto-generated. If `colors` is string and 'cs4'or 'xkcd' is included. Matplotlib.colors.CS4_COLORS or Matplotlib.colors.XKCD_COLORS should be used instead. In addition if the `'cs4'` or `'xkcd'` is suffixed by colons and integer value like ``cs4:4`` or ``xkcd:4``, the CS4 or XKCD colors should be used from index equals to ``4``. .. versionadded:: 0.2.3 Matplotlib.colors.CS4_COLORS or Matplotlib.colors.XKCD_COLORS can be used by setting `colors` to ``'cs4'`` or ``'xkcd'``. To reproduce the same CS4 or XKCD colors, set the `seed` parameter to a specific value. draw_spines: bool, tuple (-lim, +lim), default= False, Only draw spine between the y-ticks. ``-lim`` and ``+lim`` are lower and upper bound i.e. a range to draw the spines in y-axis. fig_size : tuple (width, height), default =(8, 6) the matplotlib figure size given as a tuple of width and height fig_dpi: float or 'figure', default: rcParams["savefig.dpi"] \ (default: 'figure') The resolution in dots per inch. If 'figure', use the figure's dpi value. savefig: str, default =None , the path to save the figure. Argument is passed to :class:`matplotlib.Figure` class. sns_style: str, optional, the seaborn style. seed: int, optional Allow to reproduce the Matplotlib.colors.CS4_COLORS if `colors` is set to ``cs4``. .. versionadded:: 0.2.3 verbose: int, default=0 Output the number of categorial features dropped in the dataframe. kws: dict, Additional keyword arguments passed to :func:`matplotlib.axes.plot` Examples --------- >>> from watex.datasets import load_hlogs >>> from watex.utils.plotutils import plot_logging >>> X0, y = load_hlogs (as_frame =True) # get the frames rather than object >>> # plot the default logging with Normalize =True >>> plot_logging (X0, normalize =True) >>> # Include the target in the plot >>> plot_logging ( X0, y = y.kp , posiy = 0, columns_to_skip=['thickness', 'sp'], log10 =True, ) >>> # draw spines and limit plot from (0, 700) m depth >>> plot_logging (X0 , y= y.kp, draw_spines =(0, 700) ) """ X = _assert_all_types(X, pd.DataFrame, pd.Series , np.ndarray ) X= check_array ( X, dtype =object, force_all_finite="allow-nan", input_name ="Logging dataset", to_frame =True ) # Discard all categorical values and # keep only the numerical features. # drop the complete Nan columns and rows X = to_numeric_dtypes(X, pop_cat_features=True, verbose = verbose ) if y is not None: if isinstance (y, (list, tuple)): # in the case a lst is given y = np.array (y) if not is_iterable (y): raise TypeError ("y expects an iterable object." f" got {type(y).__name__!r}") y = _assert_all_types(y, pd.Series, pd.DataFrame, np.ndarray) y=check_y ( y, to_frame =True, allow_nan= True, ) if len(y) !=len(X): raise ValueError ("y and X sizes along axis 0 must be consistent;" f" {len(y)} and {len(X)} are given.") # return X and depth X, depth = is_depth_in(X, zname or 'depth', columns = labels ) # fetch target if is given X, y = _is_target_in(X, y = y , tname = tname ) # skip log10 columns if log 10 is set to True if log10: X = _skip_log10_columns (X, column2skip = columns_to_skip , pattern= pattern, inplace =False) # if normalize then if normalize: msc = MinMaxScaler() Xsc = msc.fit_transform (X) # set a new dataframe with features if hasattr (msc , 'feature_names_in_'): X = pd.DataFrame (Xsc , columns = list(msc.feature_names_in_ ) ) else : X = pd.DataFrame(Xsc, columns =list(X.columns )) # set the x axis and delete the normalize from X # at index 0 supposed to be the x axis # Xsc.iloc [:, 0 ] = x_ser # X= Xsc.copy() # impute_nan if impute_nan: # check whether there is a Nan value in the data # impute data using mean values if X.isnull().values.any(): Xi= SimpleImputer(strategy= strategy if not fill_value else None, fill_value= fill_value ).fit_transform(X) X = pd.DataFrame(Xi, columns= X.columns) # toggle y if y is not None: X = _toggle_target_in(X, y, pos = posiy) #manage colors along colors colors = make_plot_colors ( X, colors = colors , axis = 1, seed = seed , chunk=False ) fig, ax = plt.subplots (1, ncols = X.shape [1], sharey = True , figsize = fig_size ) # customize bound and set spines for k in range (X.shape [1]): ax[k].plot ( X.iloc[:, k], depth, color = colors[k], **kws ) ax[k].tick_params(top=True, labeltop=True, bottom=False, labelbottom=False ) ax[k].set_title (X.columns [k]) ax[k].spines['right'].set_visible(False) ax[k].spines['bottom'].set_visible(False) # only show tick on the top and left ax[k].xaxis.set_ticks_position('top') if y is not None: # make X axis of the target to red # for differenciation from features. if X.columns [k] ==y.name: ax[k].spines['top'].set_color('red') if draw_spines: # Only draw spine between the y-ticks if is_iterable(draw_spines): # for consistency check whether values # are numeric draw_spines = sorted ( list(map (lambda x: float (x) , draw_spines[:2])) ) if len(draw_spines) <2: warnings.warn( "Spine bounds is a tuple of (startpoint, endpoint)" " Single limit value is not allowed." ) else: # in case only True is given # use the default plot ytv= ax[0].get_yticks () spacing = (ytv[-1] - ytv[0] )/(len(ytv)-1) # commonly matplotlib axis extrapoled the limit so # start with the first and last index draw_spines= (ytv[0] + spacing/2 , ytv[-1] - spacing/2 ) ax[k].spines['left'].set_bounds(*draw_spines ) # set labels ax[0].set_ylabel ("Depth (m)") # Tweak spacing between subplots to prevent labels # from overlapping # plt.subplots_adjust(hspace=0.5)-> removed plt.gca().invert_yaxis() if savefig is not None: plt.savefig(savefig, dpi = fig_dpi ) plt.close () if savefig is not None else plt.show()
[docs]def make_plot_colors(d , / , colors:str | list[str]=None , axis:int = 0, seed:int =None, chunk:bool =... ): """ Select colors according to the data size along axis Parameters ---------- d: Arraylike Array data to select colors according to the axis colors: str, list of Matplotlib.colors map, optional The colors for plotting each columns of `X` except the depth. If not given, default colors are auto-generated. If `colors` is string and 'cs4'or 'xkcd' is included. Matplotlib.colors.CS4_COLORS or Matplotlib.colors.XKCD_COLORS should be used instead. In addition if the `'cs4'` or `'xkcd'` is suffixed by colons and integer value like ``cs4:4`` or ``xkcd:4``, the CS4 or XKCD colors should be used from index equals to ``4``. .. versionadded:: 0.2.3 Matplotlib.colors.CS4_COLORS or Matplotlib.colors.XKCD_COLORS can be used by setting `colors` to ``'cs4'`` or ``'xkcd'``. To reproduce the same CS4 or XKCD colors, set the `seed` parameter to a specific value. axis: int, default=0 Axis along with the colors must be generated. By default colors is generated along the row axis seed: int, optional Allow to reproduce the Matplotlib.colors.CS4_COLORS if `colors` is set to ``cs4``. chunk: bool, default=True Chunk generated colors to fit the exact length of the `d` size Returns ------- colors: list List of new generated colors Examples -------- >>> import numpy as np >>> from watex.utils.plotutils import make_plot_colors >>> ar = np.random.randn (7, 2) >>> make_plot_colors (ar ) ['g', 'gray', 'y', 'blue', 'orange', 'purple', 'lime'] >>> make_plot_colors (ar , axis =1 ) Out[6]: ['g', 'gray'] >>> make_plot_colors (ar , axis =1 , colors ='cs4') ['#F0F8FF', '#FAEBD7'] >>> len(make_plot_colors (ar , axis =1 , colors ='cs4', chunk=False)) 150 >>> make_plot_colors (ar , axis =1 , colors ='cs4:4') ['#F0FFFF', '#F5F5DC'] """ # get the data size where colors must be fitted. # note colors should match either the row axis or colurms axis axis = str(axis).lower() if 'columns1'.find (axis)>=0: axis =1 else: axis =0 # manage the array d= is_iterable( d, exclude_string=True, transform=True) if not hasattr (d, '__array__'): d = np.array(d, dtype =object ) axis_length = len(d) if len(d.shape )==1 else d.shape [axis] m_cs = make_mpl_properties(axis_length ) #manage colors # we assume the first columns is dedicated for if colors ==...: colors =None if ( isinstance (colors, str) and ( "cs4" in str(colors).lower() or 'xkcd' in str(colors).lower() ) ): #initilize colors infos c = copy.deepcopy(colors) if 'cs4' in str(colors).lower() : DCOLORS = mcolors.CSS4_COLORS else: # remake the dcolors my removing the xkcd: in the keys: DCOLORS = dict(( (k.replace ('xkcd:', ''), c) for k, c in mcolors.XKCD_COLORS.items())) key_colors = list(DCOLORS.keys ()) colors = list(DCOLORS.values() ) shuffle_cs4=True cs4_start= None #------ if ':' in str(c).lower(): cs4_start = str(c).lower().split(':')[-1] #try to converert into integer try: cs4_start= int (cs4_start) except : if str(cs4_start).lower() in key_colors: cs4_start= key_colors.index (cs4_start) shuffle_cs4=False else: pass else: shuffle_cs4=False # keep CS4 and dont shuffle cs4_start= cs4_start or 0 if shuffle_cs4: np.random.seed (seed ) colors = list(np.random.choice(colors , len(m_cs))) else: if cs4_start > len(colors)-1: cs4_start = 0 colors = colors[ cs4_start:] if colors is not None: if not is_iterable(colors): colors =[colors] colors += m_cs else : colors = m_cs # shrunk data to map the exact colors chunk =True if chunk is ... else False return colors[:axis_length] if chunk else colors
[docs]def plot_silhouette (X, labels, metric ='euclidean',savefig =None , **kwds ): r"""Plot quantifying the quality of clustering silhouette Parameters --------- X : array-like of shape (n_samples_a, n_samples_a) if metric == \ "precomputed" or (n_samples_a, n_features) otherwise An array of pairwise distances between samples, or a feature array. labels : array-like of shape (n_samples,) Label values for each sample. metric : str or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If ``X`` is the distance array itself, use "precomputed" as the metric. Precomputed distance matrices must have 0 along the diagonal. savefig: str, default =None , the path to save the figure. Argument is passed to :class:`matplotlib.Figure` class. **kwds : optional keyword parameters Any further parameters are passed directly to the distance function. If using a ``scipy.spatial.distance`` metric, the parameters are still metric dependent. See the scipy docs for usage examples. See Also -------- watex.view.mlplot.plotSilhouette: Gives consistency plot as the use of `prefit` parameter which checks whether`labels` are expected to be passed into the function directly or not. Examples --------- >>> import numpy as np >>> from watex.exlib.sklearn import KMeans >>> from watex.datasets import load_iris >>> from watex.utils.plotutils import plot_silhouette >>> d= load_iris () >>> X= d.data [:, 0][:, np.newaxis] # take the first axis >>> km= KMeans (n_clusters =3 , init='k-means++', n_init =10 , max_iter = 300 , tol=1e-4, random_state =0 ) >>> y_km = km.fit_predict(X) >>> plot_silhouette (X, y_km) """ X, labels = check_X_y( X, labels, to_frame= True, ) cluster_labels = np.unique (labels) n_clusters = cluster_labels.shape [0] silhouette_vals = silhouette_samples( X, labels= labels, metric = metric ,**kwds) y_ax_lower , y_ax_upper = 0, 0 yticks =[] for i, c in enumerate (cluster_labels ) : c_silhouette_vals = silhouette_vals[labels ==c ] c_silhouette_vals.sort() y_ax_upper += len(c_silhouette_vals) color =mpl.cm.jet (float(i)/n_clusters ) plt.barh(range(y_ax_lower, y_ax_upper), c_silhouette_vals, height =1.0 , edgecolor ='none', color =color, ) yticks.append((y_ax_lower + y_ax_upper)/2.) y_ax_lower += len(c_silhouette_vals) silhouette_avg = np.mean(silhouette_vals) plt.axvline (silhouette_avg, color='red', linestyle ='--' ) plt.yticks(yticks, cluster_labels +1 ) plt.ylabel ("Cluster") plt.xlabel ("Silhouette coefficient") plt.tight_layout() if savefig is not None: plt.savefig(savefig, dpi = 300 ) plt.close () if savefig is not None else plt.show()
[docs]def plot_sbs_feature_selection ( sbs_estimator,/, X=None, y=None ,fig_size=(8, 5), sns_style =False, savefig = None, verbose=0 , **sbs_kws ): """plot Sequential Backward Selection (SBS) for feature selection. SBS collects the scores of the best feature subset at each stage. Parameters ------------ sbs_estimator : :class:`~.watex.base.SequentialBackwardSelection`\ estimator object The Sequential Backward Selection estimator can either be fitted or not. If not fitted. Please provide the training `X` and `y`, otherwise an error will occurs. X : array-like of shape (n_samples, n_features) Training vector, where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like of shape (n_samples,) or (n_samples, n_outputs) Target relative to X for classification or regression; None for unsupervised learning. n_estimators : int, default=500 The number of trees in the forest. fig_size : tuple (width, height), default =(8, 6) the matplotlib figure size given as a tuple of width and height savefig: str, default =None , the path to save the figures. Argument is passed to matplotlib.Figure class. sns_style: str, optional, the seaborn style. verbose: int, default=0 print the feature labels with the rate of their importances. sbs_kws: dict, Additional keyyword arguments passed to :class:`~.watex.base.SequentialBackwardSelection` Examples ---------- (1)-> Plot fitted SBS in action >>> from watex.exlib.sklearn import KNeighborsClassifier , train_test_split >>> from watex.datasets import fetch_data >>> from watex.base import SequentialBackwardSelection >>> from watex.utils.plotutils import plot_sbs_feature_selection >>> X, y = fetch_data('bagoue analysed') # data already standardized >>> Xtrain, Xt, ytrain, yt = train_test_split(X, y) >>> knn = KNeighborsClassifier(n_neighbors=5) >>> sbs= SequentialBackwardSelection (knn) >>> sbs.fit(Xtrain, ytrain ) >>> plot_sbs_feature_selection(sbs, sns_style= True) (2)-> Plot estimator with no prefit SBS. >>> plot_sbs_feature_selection(knn, Xtrain, ytrain) # yield the same result """ from ..base import SequentialBackwardSelection as SBS if ( not hasattr (sbs_estimator, 'scores_') and not hasattr (sbs_estimator, 'k_score_') ): if ( X is None or y is None ) : clfn = get_estimator_name( sbs_estimator) raise TypeError (f"When {clfn} is not a fitted " "estimator, X and y are needed." ) sbs_estimator = SBS(estimator = sbs_estimator, **sbs_kws) sbs_estimator.fit(X, y ) k_feat = [len(k) for k in sbs_estimator.subsets_] if verbose: flabels =None if ( not hasattr (X, 'columns') and X is not None ): warnings.warn("None columns name is detected." " Created using index ") flabels =[f'{i:>7}' for i in range (X.shape[1])] elif hasattr (X, 'columns'): flabels = list(X.columns) elif hasattr ( sbs_estimator , 'feature_names_in'): flabels = sbs_estimator.feature_names_in if flabels is not None: k3 = list (sbs_estimator.subsets_[X.shape[1]]) print("Smallest feature for subset (k=3) ") print(flabels [k3]) else : print("No column labels detected. Can't print the " "smallest feature subset.") if sns_style: _set_sns_style (sns_style) plt.figure(figsize = fig_size) plt.plot (k_feat , sbs_estimator.scores_, marker='o' ) plt.ylim ([min(sbs_estimator.scores_) -.25 , max(sbs_estimator.scores_) +.2 ]) plt.ylabel (sbs_estimator.scorer_name_ ) plt.xlabel ('Number of features') plt.tight_layout() if savefig is not None: plt.savefig(savefig ) plt.close () if savefig is not None else plt.show()
[docs]def plot_regularization_path ( X, y , c_range=(-4., 6. ), fig_size=(8, 5), sns_style =False, savefig = None, **kws ): r""" Plot the regularisation path from Logit / LogisticRegression Varying the different regularization strengths and plot the weight coefficient of the different features for different regularization strength. Note that, it is recommended to standardize the data first. Parameters ----------- X : array-like of shape (n_samples, n_features) Training vector, where `n_samples` is the number of samples and `n_features` is the number of features. X is expected to be standardized. y : array-like of shape (n_samples,) or (n_samples, n_outputs) Target relative to X for classification or regression; None for unsupervised learning. c_range: list or tuple [start, stop] Regularization strength list. It is a range from the strong strong ( start) to lower (stop) regularization. Note that 'C' is the inverse of the Logistic Regression regularization parameter :math:`\lambda`. fig_size : tuple (width, height), default =(8, 6) the matplotlib figure size given as a tuple of width and height savefig: str, default =None , the path to save the figures. Argument is passed to matplotlib.Figure class. sns_style: str, optional, the seaborn style. kws: dict, Additional keywords arguments passed to :class:`sklearn.linear_model.LogisticRegression` Examples -------- >>> from watex.utils.plotutils import plot_regularization_path >>> from watex.datasets import fetch_data >>> X, y = fetch_data ('bagoue analysed' ) # data aleardy standardized >>> plot_regularization_path (X, y ) """ X, y = check_X_y( X, y, to_frame= True, ) if not is_iterable(c_range): raise TypeError ("'C' regularization strength is a range of C " " Logit parameter: (start, stop).") c_range = sorted (c_range ) if len(c_range) < 2: raise ValueError ("'C' range expects two values [start, stop]") if len(c_range) >2 : warnings.warn ("'C' range expects two values [start, stop]. Values" f" are shrunk to the first two values: {c_range[:2]} " ) weights, params = [], [] for c in np.arange (*c_range): lr = LogisticRegression(penalty='l1', C= 10.**c, solver ='liblinear', multi_class='ovr', **kws) lr.fit(X,y ) weights.append (lr.coef_[1]) params.append(10**c) weights = np.array(weights ) colors = make_mpl_properties(weights.shape[1]) if not hasattr (X, 'columns'): flabels =[f'{i:>7}' for i in range (X.shape[1])] else: flabels = X.columns # plot fig, ax = plt.subplots(figsize = fig_size ) if sns_style: _set_sns_style (sns_style) for column , color in zip( range (weights.shape [1]), colors ): plt.plot (params , weights[:, column], label =flabels[column], color = color ) plt.axhline ( 0 , color ='black', ls='--', lw= 3 ) plt.xlim ( [ 10 ** int(c_range[0] -1), 10 ** int(c_range[1]-1) ]) plt.ylabel ("Weight coefficient") plt.xlabel ('C') plt.xscale( 'log') plt.legend (loc ='upper left',) ax.legend( loc ='upper right', bbox_to_anchor =(1.38, 1.03 ), ncol = 1 , fancybox =True ) if savefig is not None: plt.savefig(savefig, dpi = 300 ) plt.close () if savefig is not None else plt.show()
[docs]def plot_rf_feature_importances ( clf, X=None, y=None, fig_size = (8, 4), savefig =None, n_estimators= 500, verbose =0 , sns_style =None, **kws ): """ Plot features importance with RandomForest. Parameters ---------- clf : estimator object The base estimator from which the transformer is built. This can be both a fitted (if ``prefit`` is set to True) or a non-fitted estimator. The estimator should have a ``feature_importances_`` or ``coef_`` attribute after fitting. Otherwise, the ``importance_getter`` parameter should be used. X : array-like of shape (n_samples, n_features) Training vector, where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like of shape (n_samples,) or (n_samples, n_outputs) Target relative to X for classification or regression; None for unsupervised learning. n_estimators : int, default=500 The number of trees in the forest. fig_size : tuple (width, height), default =(8, 6) the matplotlib figure size given as a tuple of width and height savefig: str, default =None , the path to save the figures. Argument is passed to matplotlib.Figure class. sns_style: str, optional, the seaborn style. verbose: int, default=0 print the feature labels with the rate of their importances. kws: dict, Additional keyyword arguments passed to :class:`sklearn.ensemble.RandomForestClassifier` Examples --------- >>> from watex.datasets import fetch_data >>> from watex.exlib.sklearn import RandomForestClassifier >>> from watex.utils.plotutils import plot_rf_feature_importances >>> X, y = fetch_data ('bagoue analysed' ) >>> plot_rf_feature_importances ( RandomForestClassifier(), X=X, y=y , sns_style=True) """ if not hasattr (clf, 'feature_importances_'): if ( X is None or y is None ) : clfn = get_estimator_name( clf) raise TypeError (f"When {clfn} is not a fitted " "estimator, X and y are needed." ) clf = RandomForestClassifier(n_estimators= n_estimators , **kws) clf.fit(X, y ) importances = clf.feature_importances_ indices = np.argsort(importances)[::-1] if hasattr( X, 'columns'): flabels = X.columns else : flabels =[f'{i:>7}' for i in range (X.shape[1])] if verbose : for f in range(X.shape [1]): print("%2d) %-*s %f" %(f +1 , 30 , flabels[indices[f]], importances[indices[f]]) ) if sns_style: _set_sns_style (sns_style) plt.figure(figsize = fig_size) plt.title ("Feature importance") plt.bar (range(X.shape[1]) , importances [indices], align='center' ) plt.xticks (range (X.shape[1]), flabels [indices], rotation =90 , ) plt.xlim ([-1 , X.shape[1]]) plt.ylabel ('Importance rate') plt.xlabel ('Feature labels') plt.tight_layout() if savefig is not None: plt.savefig(savefig ) plt.close () if savefig is not None else plt.show()
[docs]def plot_confusion_matrix (yt, y_pred, view =True, ax=None, annot=True, **kws ): """ plot a confusion matrix for a single classifier model. :param yt : ndarray or Series of length n An array or series of true target or class values. Preferably, the array represents the test class labels data for error evaluation. :param y_pred: ndarray or Series of length n An array or series of the predicted target. :param view: bool, default=True Option to display the matshow map. Set to ``False`` mutes the plot. :param annot: bool, default=True Annotate the number of samples (right or wrong prediction ) in the plot. Set ``False`` to mute the display. param kws: dict, Additional keyword arguments passed to the function :func:`sckitlearn.metrics.confusion_matrix`. :returns: mat- confusion matrix bloc matrix :example: >>> #Import the required models and fetch a an Ababoost model >>> # for instance then plot the confusion metric >>> import matplotlib.pyplot as plt >>> plt.style.use ('classic') >>> from watex.datasets import fetch_data >>> from watex.exlib.sklearn import train_test_split >>> from watex.models import pModels >>> from watex.utils.plotutils import plot_confusion_matrix >>> # split the data . Note that fetch_data output X and y >>> X, Xt, y, yt = train_test_split (* fetch_data ('bagoue analysed'), test_size =.25 ) >>> # train the model with the best estimator >>> pmo = pModels (model ='ada' ) >>> pmo.fit(X, y ) >>> print(pmo.estimator_ ) >>> #%% >>> # Predict the score using under the hood the best estimator >>> # for adaboost classifier >>> ypred = pmo.predict(Xt) >>> # now plot the score >>> plot_confusion_matrix (yt , ypred ) """ check_consistent_length (yt, y_pred) mat= confusion_matrix (yt, y_pred, **kws) if ax is None: fig, ax = plt.subplots () if view: sns.heatmap ( mat.T, square =True, annot =annot, cbar=False, ax=ax) # xticklabels= list(np.unique(ytrue.values)), # yticklabels= list(np.unique(ytrue.values))) ax.set_xlabel('true labels' ) ax.set_ylabel ('predicted labels') return mat
[docs]def plot_yb_confusion_matrix ( clf, Xt, yt, labels = None , encoder = None, savefig =None, fig_size =(6, 6), **kws ): """ Confusion matrix plot using the 'yellowbrick' package. Creates a heatmap visualization of the sklearn.metrics.confusion_matrix(). A confusion matrix shows each combination of the true and predicted classes for a test data set. The default color map uses a yellow/orange/red color scale. The user can choose between displaying values as the percent of true (cell value divided by sum of row) or as direct counts. If percent of true mode is selected, 100% accurate predictions are highlighted in green. Requires a classification model. Be sure 'yellowbrick' is installed before using the function, otherwise an ImportError will raise. Parameters ----------- clf : classifier estimator A scikit-learn estimator that should be a classifier. If the model is not a classifier, an exception is raised. If the internal model is not fitted, it is fit when the visualizer is fitted, unless otherwise specified by ``is_fitted``. Xt : ndarray or DataFrame of shape n x m A matrix of n instances with m features. Preferably, matrix represents the test data for error evaluation. yt : ndarray or Series of length n An array or series of target or class values. Preferably, the array represent the test class labels data for error evaluation. ax : matplotlib Axes, default: None The axes to plot the figure on. If not specified the current axes will be used (or generated if required). sample_weight: array-like of shape = [n_samples], optional Passed to ``confusion_matrix`` to weight the samples. encoder : dict or LabelEncoder, default: None A mapping of classes to human readable labels. Often there is a mismatch between desired class labels and those contained in the target variable passed to ``fit()`` or ``score()``. The encoder disambiguates this mismatch ensuring that classes are labeled correctly in the visualization. labels : list of str, default: None The class labels to use for the legend ordered by the index of the sorted classes discovered in the ``fit()`` method. Specifying classes in this manner is used to change the class names to a more specific format or to label encoded integer classes. Some visualizers may also use this field to filter the visualization for specific classes. For more advanced usage specify an encoder rather than class labels. fig_size : tuple (width, height), default =(8, 6) the matplotlib figure size given as a tuple of width and height savefig: str, default =None , the path to save the figures. Argument is passed to matplotlib.Figure class. Returns -------- cmo: :class:`yellowbrick.classifier.confusion_matrix.ConfusionMatrix` return a yellowbrick confusion matrix object instance. Examples -------- >>> #Import the required models and fetch a an extreme gradient boosting >>> # for instance then plot the confusion metric >>> import matplotlib.pyplot as plt >>> plt.style.use ('classic') >>> from watex.datasets import fetch_data >>> from watex.exlib.sklearn import train_test_split >>> from watex.models import pModels >>> from watex.utils.plotutils import plot_yb_confusion_matrix >>> # split the data . Note that fetch_data output X and y >>> X, Xt, y, yt = train_test_split (* fetch_data ('bagoue analysed'), test_size =.25 ) >>> # train the model with the best estimator >>> pmo = pModels (model ='xgboost' ) >>> pmo.fit(X, y ) >>> print(pmo.estimator_ ) # pmo.XGB.best_estimator_ >>> #%% >>> # Predict the score using under the hood the best estimator >>> # for adaboost classifier >>> ypred = pmo.predict(Xt) >>> # now plot the score >>> plot_yb_confusion_matrix (pmo.XGB.best_estimator_, Xt, yt ) """ import_optional_dependency('yellowbrick', ( "Cannot plot the confusion matrix via 'yellowbrick' package." " Alternatively, you may use ufunc `~.plot_confusion_matrix`," " otherwise install it mannually.") ) fig, ax = plt.subplots(figsize = fig_size ) cmo= ConfusionMatrix (clf, classes=labels, label_encoder = encoder, **kws ) cmo.score(Xt, yt) cmo.show() if savefig is not None: fig.savefig(savefig, dpi =300) plt.close () if savefig is not None else plt.show() return cmo
[docs]def plot_confusion_matrices ( clfs, Xt, yt, annot =True, pkg=None, normalize='true', sample_weight=None, encoder=None, fig_size = (22, 6), savefig =None, subplot_kws=None, **scorer_kws ): """ Plot inline multiple model confusion matrices using either the sckitlearn or 'yellowbrick' Parameters ----------- clfs : list of classifier estimators A scikit-learn estimator that should be a classifier. If the model is not a classifier, an exception is raised. Note that the classifier must be fitted beforehand. Xt : ndarray or DataFrame of shape (M X N) A matrix of n instances with m features. Preferably, matrix represents the test data for error evaluation. yt : ndarray of shape (M, ) or Series oF length (M, ) An array or series of target or class values. Preferably, the array represent the test class labels data for error evaluation. pkg: str, optional , default ='sklearn' the library to handle the plot. It could be 'yellowbrick'. The basic confusion matrix is handled by the scikit-learn package. normalize : {'true', 'pred', 'all'}, default=None Normalizes confusion matrix over the true (rows), predicted (columns) conditions or all the population. If None, confusion matrix will not be normalized. sample_weight : array-like of shape (n_samples,), default=None Sample weights. encoder : dict or LabelEncoder, default: None A mapping of classes to human readable labels. Often there is a mismatch between desired class labels and those contained in the target variable passed to ``fit()`` or ``score()``. The encoder disambiguates this mismatch ensuring that classes are labeled correctly in the visualization. annot: bool, default=True Annotate the number of samples (right or wrong prediction ) in the plot. Set ``False`` to mute the display. fig_size : tuple (width, height), default =(8, 6) the matplotlib figure size given as a tuple of width and height savefig: str, default =None , the path to save the figures. Argument is passed to matplotlib.Figure class. Examples ---------- >>> import matplotlib.pyplot as plt >>> plt.style.use ('classic') >>> from watex.datasets import fetch_data >>> from watex.exlib.sklearn import train_test_split >>> from watex.models.premodels import p >>> from watex.utils.plotutils import plot_confusion_matrices >>> # split the data . Note that fetch_data output X and y >>> X, Xt, y, yt = train_test_split (* fetch_data ('bagoue analysed'), test_size =.25 ) >>> # compose the models >>> # from RBF, and poly >>> models =[ p.SVM.rbf.best_estimator_, p.LogisticRegression.best_estimator_, p.RandomForest.best_estimator_ ] >>> models [SVC(C=2.0, coef0=0, degree=1, gamma=0.125), LogisticRegression(), RandomForestClassifier(criterion='entropy', max_depth=16, n_estimators=350)] >>> # now fit all estimators >>> fitted_models = [model.fit(X, y) for model in models ] >>> plot_confusion_matrices(fitted_models , Xt, yt) """ pkg = pkg or 'sklearn' pkg= str(pkg).lower() assert pkg in {"sklearn", "scikit-learn", 'yellowbrick', "yb"}, ( f" Accepts only 'sklearn' or 'yellowbrick' packages, got {pkg!r}") if not is_iterable( clfs): clfs =[clfs] model_names = [get_estimator_name(name) for name in clfs ] # create a figure subplot_kws = subplot_kws or dict (left=0.0625, right = 0.95, wspace = 0.12) fig, axes = plt.subplots(1, len(clfs), figsize =(22, 6)) fig.subplots_adjust(**subplot_kws) if not is_iterable(axes): axes =[axes] for kk, (model , mname) in enumerate(zip(clfs, model_names )): ypred = model.predict(Xt) if pkg in ('sklearn', 'scikit-learn'): plot_confusion_matrix(yt, ypred, annot =annot , ax = axes[kk], normalize= normalize , sample_weight= sample_weight ) axes[kk].set_title (mname) elif pkg in ('yellowbrick', 'yb'): plot_yb_confusion_matrix( model, Xt, yt, ax=axes[kk], encoder =encoder ) if savefig is not None: plt.savefig(savefig, dpi = 300 ) plt.close () if savefig is not None else plt.show()
[docs]def plot_learning_curves( models, X , y, *, cv =None, train_sizes= None, baseline_score =0.4, scoring=None, convergence_line =True, fig_size=(20, 6), sns_style =None, savefig=None, set_legend=True, subplot_kws=None, **kws ): """ Horizontally visualization of multiple models learning curves. Determines cross-validated training and test scores for different training set sizes. Parameters ---------- models: list or estimators An estimator instance or not that implements `fit` and `predict` methods which will be cloned for each validation. X : array-like of shape (n_samples, n_features) Training vector, where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like of shape (n_samples,) or (n_samples, n_outputs) Target relative to X for classification or regression; None for unsupervised learning. cv : int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. These splitters are instantiated with `shuffle=False` so the splits will be the same across calls. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. ``cv`` default value if None changed from 3-fold to 4-fold. train_sizes : array-like of shape (n_ticks,), \ default=np.linspace(0.1, 1, 50) Relative or absolute numbers of training examples that will be used to generate the learning curve. If the dtype is float, it is regarded as a fraction of the maximum size of the training set (that is determined by the selected validation method), i.e. it has to be within (0, 1]. Otherwise it is interpreted as absolute sizes of the training sets. Note that for classification the number of samples usually have to be big enough to contain at least one sample from each class. baseline_score: floatm default=.4 base score to start counting in score y-axis (score) scoring : str or callable, default=None A str (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. convergence_line: bool, default=True display the convergence line or not that indicate the level of bias between the training and validation curve. fig_size : tuple (width, height), default =(14, 6) the matplotlib figure size given as a tuple of width and height sns_style: str, optional, the seaborn style . set_legend: bool, default=True display legend in each figure. Note the default location of the legend is 'best' from :func:`~matplotlib.Axes.legend` subplot_kws: dict, default is \ dict(left=0.0625, right = 0.95, wspace = 0.1) the subplot keywords arguments passed to :func:`matplotlib.subplots_adjust` kws: dict, keyword arguments passed to :func:`sklearn.model_selection.learning_curve` Examples --------- (1) -> plot via a metaestimator already cross-validated. >>> from watex.models.premodels import p >>> from watex.datasets import fetch_data >>> from watex.utils.plotutils import plot_learning_curves >>> X, y = fetch_data ('bagoue prepared') # yields a sparse matrix >>> # let collect 04 estimators already cross-validated from SVMs >>> models = [ p.SVM.linear , p.SVM.rbf , p.SVM.sigmoid , p.SVM.poly ] >>> plot_learning_curves (models, X, y, cv=4, sns_style = 'darkgrid') (2) -> plot with multiples models not crossvalidated yet. >>> from watex.exlib.sklearn import (LogisticRegression, RandomForestClassifier, SVC , KNeighborsClassifier ) >>> models =[LogisticRegression(), RandomForestClassifier(), SVC() , KNeighborsClassifier() ] >>> plot_learning_curves (models, X, y, cv=4, sns_style = 'darkgrid') """ if not is_iterable(models): models =[models] subplot_kws = subplot_kws or dict( left=0.0625, right = 0.95, wspace = 0.1) train_sizes = train_sizes or np.linspace(0.1, 1, 50) cv = cv or 4 if ( baseline_score >=1 and baseline_score < 0 ): raise ValueError ("Score for the base line must be less 1 and " f"greater than 0; got {baseline_score}") if sns_style: _set_sns_style (sns_style) mnames = [get_estimator_name(n) for n in models] fig, axes = plt.subplots(nrows=1, ncols=len(models), figsize =fig_size) # for consistency, put axes on list when # a single model is provided if not is_iterable(axes): axes =[axes] fig.subplots_adjust(**subplot_kws) for k, (model, name) in enumerate(zip(models, mnames)): cmodel = model.best_estimator_ if _is_cross_validated( model ) else model ax = list(axes)[k] N, train_lc , val_lc = learning_curve( cmodel , X, y, train_sizes = np.linspace(0.1, 1, 50), cv=cv, scoring=scoring, **kws ) ax.plot(N, np.mean(train_lc, 1), color ="blue", label ="train score" ) ax.plot(N, np.mean(val_lc, 1), color ="r", label ="validation score" ) if convergence_line : ax.hlines(np.mean([train_lc[-1], val_lc[-1]]), N[0], N[-1], color="k", linestyle ="--" ) ax.set_ylim(baseline_score, 1) #ax[k].set_xlim (N[0], N[1]) ax.set_xlabel("training size") ax.set_title(name, size=14) if set_legend: ax.legend(loc='best') # for consistency ax = list(axes)[0] ax.set_ylabel("score") if savefig is not None: plt.savefig(savefig, dpi = 300 ) plt.close () if savefig is not None else plt.show()
[docs]def plot_naive_dendrogram ( X, *ybounds, fig_size = (12, 5 ), savefig=None, **kws ): """ Quick plot dendrogram using the ward clustering function from Scipy. :param X: ndarray of shape (n_samples, n_features) Array of features :param ybounds: int, integrer values to draw horizontal cluster lines that indicate the number of clusters. :param fig_size: tuple (width, height), default =(12,5) the matplotlib figure size given as a tuple of width and height :param kws: dict , Addditional keyword arguments passed to :func:`scipy.cluster.hierarchy.dendrogram` :Examples: >>> from watex.datasets import fetch_data >>> from watex.utils.plotutils import plot_naive_dendrogram >>> X, _= fetch_data('Bagoue analysed') # data is already scaled >>> # get the two features 'power' and 'magnitude' >>> data = X[['power', 'magnitude']] >>> plot_naive_dendrogram(data ) >>> # add the horizontal line of the cluster at ybounds = (20 , 20 ) >>> # for a single cluster (cluser 1) >>> plot_naive_dendrogram(data , 20, 20 ) """ # assert ybounds agument if given msg =(". Note that the bounds in y-axis are the y-coordinates for" " horizontal lines regarding to the number of clusters that" " might be cutted.") try : ybounds = [ int (a) for a in ybounds ] except Exception as typerror: raise TypeError (str(typerror) + msg) else : if len(ybounds)==0 : ybounds = None # the scipy ward function returns # an array that specifies the # distance bridged when performed # agglomerate clustering linkage_array = ward(X) # plot the dendrogram for the linkage array # containing the distances between clusters dendrogram( linkage_array , **kws ) # mark the cuts on the tree that signify two or three clusters # change the gca figsize plt.rcParams["figure.figsize"] = fig_size ax= plt.gca () if ybounds is not None: if not is_iterable(ybounds): ybounds =[ybounds] if len(ybounds) <=1 : warnings.warn(f"axis y bound might be greater than {len(ybounds)}") else : # split ybound into sublist of pair (x, y) coordinates nsplits = len(ybounds)//2 len_splits = [ 2 for i in range (nsplits)] # compose the pir list (x,y ) itb = iter (ybounds) ybounds = [list(itertools.islice (itb, it)) for it in len_splits] bounds = ax.get_xbound () for i , ( x, y) in enumerate (ybounds) : ax.plot(bounds, [x, y], '--', c='k') ax.text ( bounds [1], y , f"cluster {i +1:02}", va='center', fontdict ={'size': 15} ) # get xticks and format labels xticks_loc = list(ax.get_xticks()) _get_xticks_formatage(ax, xticks_loc, space =14 ) plt.xlabel ("Sample index ") plt.ylabel ("Cluster distance") if savefig is not None: plt.savefig(savefig, dpi = 300 ) plt.close () if savefig is not None else plt.show()
[docs]def plot_pca_components ( components, *, feature_names = None , cmap= 'viridis', savefig=None, **kws ): """ Visualize the coefficient of principal component analysis (PCA) as a heatmap :param components: Ndarray, shape (n_components, n_features)or PCA object Array of the PCA compoments or object from :class:`watex.analysis.dimensionality.nPCA`. If the object is given it is not necessary to set the `feature_names` :param feature_names: list or str, optional list of the feature names to locate in the map. `Feature_names` and the number of eigen vectors must be the same length. If PCA object is passed as `components` arguments, no need to set the `feature_names`. The name of features is retreived automatically. :param cmap: str, default='viridis' the matplotlib color map for matshow visualization. :param kws: dict, Additional keywords arguments passed to :class:`matplotlib.pyplot.matshow` :Examples: (1)-> with PCA object >>> from watex.datasets import fetch_data >>> from watex.utils.plotutils import plot_pca_components >>> from watex.analysis import nPCA >>> X, _= fetch_data('bagoue pca') >>> pca = nPCA (X, n_components=2, return_X =False)# to return object >>> plot_pca_components (pca) (2)-> use the components and features individually >>> components = pca.components_ >>> features = pca.feature_names_in_ >>> plot_pca_components (components, feature_names= features, cmap='jet_r') """ if sp.issparse (components): raise TypeError ("Sparse array is not supported for PCA " "components visualization." ) # if pca object is given , get the features names if hasattr(components, "feature_names_in_"): feature_names = list (getattr (components , "feature_names_in_" ) ) if not hasattr (components , "__array__"): components = _check_array_in (components, 'components_') plt.matshow(components, cmap =cmap , **kws) plt.yticks ([0 , 1], ['First component', 'Second component']) cb=plt.colorbar() cb.set_label('Coeff value') if not is_iterable(feature_names ): feature_names = [feature_names ] if len(feature_names)!= components.shape [1] : warnings.warn("Number of features and eigenvectors might" " be consistent, expect {0}, got {1}". format( components.shape[1], len(feature_names)) ) feature_names=None if feature_names is not None: plt.xticks (range (len(feature_names)), feature_names , rotation = 60 , ha='left' ) plt.xlabel ("Feature") plt.ylabel ("Principal components") if savefig is not None: plt.savefig(savefig, dpi = 300 ) plt.close () if savefig is not None else plt.show()
[docs]def plot_clusters ( n_clusters, X, y_pred, cluster_centers =None , savefig =None, ): """ Visualize the cluster that k-means identified in the dataset :param n_clusters: int, number of cluster to visualize :param X: NDArray, data containing the features, expect to be a two dimensional data :param y_pred: array-like, array containing the predicted class labels. :param cluster_centers_: NDArray containg the coordinates of the centroids or the similar points with continous features. :Example: >>> from watex.exlib.sklearn import KMeans, MinMaxScaler >>> from watex.utils.plotutils import plot_clusters >>> from watex.datasets import fetch_data >>> h= fetch_data('hlogs').frame >>> # collect two features 'resistivity' and gamma-gamma logging values >>> h2 = h[['resistivity', 'gamma_gamma']] >>> km = KMeans (n_clusters =3 , init= 'random' ) >>> # scaled the data with MinMax scaler i.e. between ( 0-1) >>> h2_scaled = MinMaxScaler().fit_transform(h2) >>> ykm = km.fit_predict(h2_scaled ) >>> plot_clusters (3 , h2_scaled, ykm , km.cluster_centers_ ) """ n_clusters = int( _assert_all_types(n_clusters, int, float, objname ="'n_clusters'" ) ) X, y_pred = check_X_y( X, y_pred, ) if len(X.shape )!=2 or X.shape[1]==1: ndim = 1 if X.shape[1] ==1 else np.ndim (X ) raise ValueError( f"X is expected to be a two dimensional data. Got {ndim}!") # for consistency , convert y to array y_pred = np.array(y_pred) colors = make_mpl_properties(n_clusters) markers = make_mpl_properties(n_clusters, 'markers') for n in range (n_clusters): plt.scatter (X[y_pred ==n, 0], X[y_pred ==n , 1], s= 50 , c= colors [n ], marker=markers [n], edgecolors=None if markers [n] =='x' else 'black', label = f'Cluster {n +1}' ) if cluster_centers is not None: cluster_centers = np.array (cluster_centers) plt.scatter (cluster_centers[:, 0 ], cluster_centers [:, 1], s= 250. , marker ='*', c='red', edgecolors='black', label='centroids' ) plt.legend (scatterpoints =1 ) plt.grid() plt.tight_layout() if savefig is not None: savefigure(savefig, savefig ) plt.close () if savefig is not None else plt.show()
[docs]def plot_elbow ( X, n_clusters , n_init = 10 , max_iter = 300 , random_state=42 , fig_size = (10, 4 ), marker = 'o', savefig= None, **kwd): """ Plot elbow method to find the optimal number of cluster, k', for a given data. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Training instances to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous. If a sparse matrix is passed, a copy will be made if it's not in CSR format. n_clusters : int, default=8 The number of clusters to form as well as the number of centroids to generate. n_init : int, default=10 Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. max_iter : int, default=300 Maximum number of iterations of the k-means algorithm for a single run. tol : float, default=1e-4 Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence. verbose : int, default=0 Verbosity mode. random_state : int, RandomState instance or None, default=42 Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. savefig: str, default =None , the path to save the figure. Argument is passed to :class:`matplotlib.Figure` class. marker: str, default='o', cluster marker point. kwd: dict Addionnal keywords arguments passed to :func:`matplotlib.pyplot.plot` Returns -------- ax: Matplotlib.pyplot axes objects Example --------- >>> from watex.datasets import load_hlogs >>> from watex.utils.plotutils import plot_elbow >>> # get the only resistivy and gamma-gama values for example >>> res_gamma = load_hlogs ().frame[['resistivity', 'gamma_gamma']] >>> plot_elbow(res_gamma, n_clusters=11) """ distorsions =[] ; n_clusters = 11 for i in range (1, n_clusters ): km =KMeans (n_clusters =i , init= 'k-means++', n_init=n_init , max_iter=max_iter, random_state =random_state ) km.fit(X) distorsions.append(km.inertia_) ax = _plot_elbow (distorsions, n_clusters =n_clusters,fig_size = fig_size , marker =marker , savefig =savefig, **kwd) return ax
def _plot_elbow (distorsions: list , n_clusters:int ,fig_size = (10 , 4 ), marker='o', savefig =None, **kwd): """ Plot the optimal number of cluster, k', for a given class :param distorsions: list - list of values withing the sum-squared-error (SSE) also called `inertia_` in sckit-learn. :param n_clusters: number of clusters. where k starts and end. :returns: ax: Matplotlib.pyplot axes objects :Example: >>> import numpy as np >>> from sklearn.cluster import KMeans >>> from watex.datasets import load_iris >>> from watex.utils.plotutils import plot_elbow >>> d= load_iris () >>> X= d.data [:, 0][:, np.newaxis] # take the first axis >>> # compute distorsiosn for KMeans range >>> distorsions =[] ; n_clusters = 11 >>> for i in range (1, n_clusters ): km =KMeans (n_clusters =i , init= 'k-means++', n_init=10 , max_iter=300, random_state =0 ) km.fit(X) distorsions.append(km.inertia_) >>> plot_elbow (distorsions, n_clusters =n_clusters) """ fig, ax = plt.subplots ( nrows=1 , ncols =1 , figsize = fig_size ) ax.plot (range (1, n_clusters), distorsions , marker = marker, **kwd ) plt.xlabel ("Number of clusters") plt.ylabel ("Distorsion") plt.tight_layout() if savefig is not None: savefigure(fig, savefig ) plt.show() if savefig is None else plt.close () return ax
[docs]def plot_cost_vs_epochs(regs, *, fig_size = (10 , 4 ), marker ='o', savefig =None, **kws): """ Plot the cost against the number of epochs for the two different learnings rates Parameters ---------- regs: Callable, single or list of regression estimators Estimator should be already fitted. fig_size: tuple , default is (10, 4) the size of figure kws: dict , Additionnal keywords arguments passes to :func:`matplotlib.pyplot.plot` Returns ------- ax: Matplotlib.pyplot axes objects Examples --------- >>> from watex.datasets import load_iris >>> from watex.base import AdalineGradientDescent >>> from watex.utils.plotutils import plot_cost_vs_epochs >>> X, y = load_iris (return_X_y= True ) >>> ada1 = AdalineGradientDescent (n_iter= 10 , eta= .01 ).fit(X, y) >>> ada2 = AdalineGradientDescent (n_iter=10 , eta =.0001 ).fit(X, y) >>> plot_cost_vs_epochs (regs = [ada1, ada2] ) """ if not isinstance (regs, (list, tuple, np.array)): regs =[regs] s = set ([hasattr(o, '__class__') for o in regs ]) if len(s) != 1: raise ValueError("All regression models should be estimators" " already fitted.") if not list(s) [0] : raise TypeError(f"Needs an estimator, got {type(s[0]).__name__!r}") fig, ax = plt.subplots ( nrows=1 , ncols =len(regs) , figsize = fig_size ) for k, m in enumerate (regs) : ax[k].plot(range(1, len(m.cost_)+ 1 ), np.log10 (m.cost_), marker =marker, **kws) ax[k].set_xlabel ("Epochs") ax[k].set_ylabel ("Log(sum-squared-error)") ax[k].set_title("%s -Learning rate %.4f" % (m.__class__.__name__, m.eta )) if savefig is not None: savefigure(fig, savefig ) plt.show() if savefig is None else plt.close () return ax
[docs]def plot_mlxtend_heatmap (df, columns =None, savefig=None, **kws): """ Plot correlation matrix array as a heat map :param df: dataframe pandas :param columns: list of features, If given, only the dataframe with that features is considered. :param kws: additional keyword arguments passed to :func:`mlxtend.plotting.heatmap` :return: :func:`mlxtend.plotting.heatmap` axes object :example: >>> from watex.datasets import load_hlogs >>> from watex.utils.plotutils import plot_mlxtend_heatmap >>> h=load_hlogs() >>> features = ['gamma_gamma', 'sp', 'natural_gamma', 'resistivity'] >>> plot_mlxtend_heatmap (h.frame , columns =features, cmap ='PuOr') """ import_optional_dependency('mlxtend', extra=( "Can't plot heatmap using 'mlxtend' package.")) from mlxtend.plotting import ( heatmap ) cm = np.corrcoef(df[columns]. values.T) ax= heatmap(cm, row_names = columns , column_names = columns, **kws ) if savefig is not None: savefigure(savefig, savefig ) plt.close () if savefig is not None else plt.show() return ax
[docs]def plot_mlxtend_matrix(df, columns =None, fig_size = (10 , 8 ), alpha =.5, savefig=None ): """ Visualize the pair wise correlation between the different features in the dataset in one place. :param df: dataframe pandas :param columns: list of features, If given, only the dataframe with that features is considered. :param fig_size: tuple of int (width, heigh) Size of the displayed figure :param alpha: figure transparency, default is ``.5``. :return: :func:`mlxtend.plotting.scatterplotmatrix` axes object :example: >>> from watex.datasets import load_hlogs >>> from watex.utils.plotutils import plot_mlxtend_matrix >>> import pandas as pd >>> import numpy as np >>> h=load_hlogs() >>> features = ['gamma_gamma', 'natural_gamma', 'resistivity'] >>> data = pd.DataFrame ( np.log10 (h.frame[features]), columns =features ) >>> plot_mlxtend_matrix (data, columns =features) """ import_optional_dependency("mlxtend", extra = ( "Can't plot the scatter matrix using 'mlxtend' package.") ) from mlxtend.plotting import scatterplotmatrix if isinstance (columns, str): columns = [columns ] try: iter (columns) except : raise TypeError(" Columns should be an iterable object, not" f" {type (columns).__name__!r}") columns =list(columns) if columns is not None: df =df[columns ] ax = scatterplotmatrix ( df[columns].values , figsize =fig_size,names =columns , alpha =alpha ) plt.tight_layout() if savefig is not None: savefigure(savefig, savefig ) plt.close () if savefig is not None else plt.show() return ax
[docs]def savefigure (fig: object , figname: str = None, ext:str ='.png', **skws ): """ save figure from the given figure name :param fig: Matplotlib figure object :param figname: name of figure to output :param ext: str - extension of the figure :param skws: Matplotlib savefigure keywards additional keywords arguments :return: Matplotlib savefigure objects. """ ext = '.' + str(ext).lower().strip().replace('.', '') if figname is None: figname = '_' + os.path.splitext(os.path.basename(__file__)) +\ datetime.datetime.now().strftime('%m-%d-%Y %H:%M:%S') + ext warnings.warn("No name of figure is given. Figure should be renamed as " f"{figname!r}") file, ex = os.path.splitext(figname) if ex in ('', None): ex = ext figname = os.path.join(file, f'{ext}') return fig.savefig(figname, **skws)
[docs]def resetting_ticks ( get_xyticks, number_of_ticks=None ): """ resetting xyticks modulo , 100 :param get_xyticks: xyticks list , use to ax.get_x|yticks() :type get_xyticks: list :param number_of_ticks: maybe the number of ticks on x or y axis :type number_of_ticks: int :returns: a new_list or ndarray :rtype: list or array_like """ if not isinstance(get_xyticks, (list, np.ndarray) ): warnings.warn ( 'Arguments get_xyticks must be a list' ' not <{0}>.'.format(type(get_xyticks))) raise TipError ( '<{0}> found. "get_xyticks" must be a ' 'list or (nd.array,1).'.format(type(get_xyticks))) if number_of_ticks is None : if len(get_xyticks) > 2 : number_of_ticks = int((len(get_xyticks)-1)/2) else : number_of_ticks = len(get_xyticks) if not(number_of_ticks, (float, int)): try : number_of_ticks=int(number_of_ticks) except : warnings.warn('"Number_of_ticks" arguments is the times to see ' 'the ticks on x|y axis.'\ ' Must be integer not <{0}>.'. format(type(number_of_ticks))) raise PlotError(f'<{type(number_of_ticks).__name__}> detected.' ' Must be integer.') number_of_ticks=int(number_of_ticks) if len(get_xyticks) > 2 : if get_xyticks[1] %10 != 0 : get_xyticks[1] =get_xyticks[1] + (10 - get_xyticks[1] %10) if get_xyticks[-2]%10 !=0 : get_xyticks[-2] =get_xyticks[-2] -get_xyticks[-2] %10 new_array = np.linspace(get_xyticks[1], get_xyticks[-2], number_of_ticks ) elif len(get_xyticks)< 2 : new_array = np.array(get_xyticks) return new_array
[docs]def make_mpl_properties(n ,prop ='color'): """ make matplotlib property ('colors', 'marker', 'line') to fit the numer of samples :param n: int, Number of property that is needed to create. It generates a group of property items. :param prop: str, default='color', name of property to retrieve. Accepts only 'colors', 'marker' or 'line'. :return: list of property items with size equals to `n`. :Example: >>> from watex.utils.plotutils import make_mpl_properties >>> make_mpl_properties (10 ) ... ['g', 'gray', 'y', 'blue', 'orange', 'purple', 'lime', 'k', 'cyan', (0.6, 0.6, 0.6)] >>> make_mpl_properties(100 , prop = 'marker') ... ['o', '^', 'x', 'D', . . . 11, 'None', None, ' ', ''] >>> make_mpl_properties(50 , prop = 'line') ... ['-', '-', '--', '-.', . . . 'solid', 'dashed', 'dashdot', 'dotted'] """ n=int(_assert_all_types(n, int, float, objname ="'n'")) prop = str(prop).lower().strip().replace ('s', '') if prop not in ('color', 'marker', 'line'): raise ValueError ("Property {prop!r} is not availabe yet. , Expect" " 'colors', 'marker' or 'line'.") # customize plots with colors lines and styles # and create figure obj if prop=='color': d_colors = D_COLORS d_colors = mpl.colors.ListedColormap(d_colors[:n]).colors if len(d_colors) == n: props= d_colors else: rcolors = list(itertools.repeat( d_colors , (n + len(d_colors))//len(d_colors))) props = list(itertools.chain(*rcolors)) if prop=='marker': d_markers = D_MARKERS + list(mpl.lines.Line2D.markers.keys()) rmarkers = list(itertools.repeat( d_markers , (n + len(d_markers))//len(d_markers))) props = list(itertools.chain(*rmarkers)) # repeat the lines to meet the number of cv_size if prop=='line': d_lines = D_STYLES rlines = list(itertools.repeat( d_lines , (n + len(d_lines))//len(d_lines))) # combine all repeatlines props = list(itertools.chain(*rlines)) return props [: n ]
[docs]def resetting_colorbar_bound(cbmax , cbmin, number_of_ticks = 5, logscale=False): """ Function to reset colorbar ticks more easy to read :param cbmax: value maximum of colorbar :type cbmax: float :param cbmin: minimum data value :type cbmin: float minimum data value :param number_of_ticks: number of ticks should be located on the color bar . Default is 5. :type number_of_ticks: int :param logscale: set to True if your data are lograith data . :type logscale: bool :returns: array of color bar ticks value. :rtype: array_like """ def round_modulo10(value): """ round to modulo 10 or logarithm scale , """ if value %mod10 == 0 : return value if value %mod10 !=0 : if value %(mod10 /2) ==0 : return value else : return (value - value %mod10 ) if not(number_of_ticks, (float, int)): try : number_of_ticks=int(number_of_ticks) except : warnings.warn('"Number_of_ticks" arguments ' 'is the times to see the ticks on x|y axis.' ' Must be integer not <{0}>.'.format( type(number_of_ticks))) raise TipError('<{0}> detected. Must be integer.') number_of_ticks=int(number_of_ticks) if logscale is True : mod10 =np.log10(10) else :mod10 = 10 if cbmax % cbmin == 0 : return np.linspace(cbmin, cbmax , number_of_ticks) elif cbmax% cbmin != 0 : startpoint = cbmin + (mod10 - cbmin % mod10 ) endpoint = cbmax - cbmax % mod10 return np.array( [round_modulo10(ii) for ii in np.linspace( startpoint,endpoint, number_of_ticks)] )
[docs]def controle_delineate_curve(res_deline =None , phase_deline =None ): """ fonction to controle delineate value given and return value ceilling . :param res_deline: resistivity value todelineate. unit of Res in `ohm.m` :type res_deline: float|int|list :param phase_deline: phase value to delineate , unit of phase in degree :type phase_deline: float|int|list :returns: delineate resistivity or phase values :rtype: array_like """ fmt=['resistivity, phase'] for ii, xx_deline in enumerate([res_deline , phase_deline]): if xx_deline is not None : if isinstance(xx_deline, (float, int, str)): try :xx_deline= float(xx_deline) except : raise TipError( 'Value <{0}> to delineate <{1}> is unacceptable.'\ ' Please ckeck your value.'.format(xx_deline, fmt[ii])) else : if ii ==0 : return [np.ceil(np.log10(xx_deline))] if ii ==1 : return [np.ceil(xx_deline)] if isinstance(xx_deline , (list, tuple, np.ndarray)): xx_deline =list(xx_deline) try : if ii == 0 : xx_deline = [ np.ceil(np.log10(float(xx))) for xx in xx_deline] elif ii ==1 : xx_deline = [ np.ceil(float(xx)) for xx in xx_deline] except : raise TipError( 'Value to delineate <{0}> is unacceptable.'\ ' Please ckeck your value.'.format(fmt[ii])) else : return xx_deline
[docs]def fmt_text (data_text, fmt='~', leftspace = 3, return_to_line =77) : """ Allow to format report with data text , fm and leftspace :param data_text: a long text :type data_text: str :param fmt: type of underline text :type fmt: str :param leftspae: How many space do you want before starting wrinting report . :type leftspae: int :param return_to_line: number of character to return to line :type return_to_line: int """ return_to_line= int(return_to_line) begin_text= leftspace *' ' text= begin_text + fmt*(return_to_line +7) + '\n'+ begin_text ss=0 for ii, num in enumerate(data_text) : # loop the text if ii == len(data_text)-1 : # if find the last character of text #text = text + data_text[ss:] + ' {0}\n'.format(fmt) # take the #remain and add return chariot text = text+ ' {0}\n'.format(fmt) +\ begin_text +fmt*(return_to_line+7) +'\n' break if ss == return_to_line : if data_text[ii+1] !=' ' : text = '{0} {1}- \n {2} '.format( text, fmt, begin_text + fmt ) else : text ='{0} {1} \n {2} '.format( text, fmt, begin_text+fmt ) ss=0 text += num # add charatecter ss +=1 return text
[docs]def plotvec1(u, z, v): """ Plot tips function with three vectors. :param u: vector u - a vector :type u: array like :param z: vector z :type z: array_like :param v: vector v :type v: array_like return: plot """ ax = plt.axes() ax.arrow(0, 0, *u, head_width=0.05, color='r', head_length=0.1) plt.text(*(u + 0.1), 'u') ax.arrow(0, 0, *v, head_width=0.05, color='b', head_length=0.1) plt.text(*(v + 0.1), 'v') ax.arrow(0, 0, *z, head_width=0.05, head_length=0.1) plt.text(*(z + 0.1), 'z') plt.ylim(-2, 2) plt.xlim(-2, 2)
[docs]def plotvec2(a,b): """ Plot tips function with two vectors Just use to get the orthogonality of two vector for other purposes :param a: vector u :type a: array like - a vector :param b: vector z :type b: array_like * Write your code below and press Shift+Enter to execute :Example: >>> import numpy as np >>> from watex.utils.plotutils import plotvec2 >>> a=np.array([1,0]) >>> b=np.array([0,1]) >>> Plotvec2(a,b) >>> print('the product a to b is =', np.dot(a,b)) """ ax = plt.axes() ax.arrow(0, 0, *a, head_width=0.05, color ='r', head_length=0.1) plt.text(*(a + 0.1), 'a') ax.arrow(0, 0, *b, head_width=0.05, color ='b', head_length=0.1) plt.text(*(b + 0.1), 'b') plt.ylim(-2, 2) plt.xlim(-2, 2)
[docs]def plot_errorbar( ax, x_ar, y_ar, y_err=None, x_err=None, color='k', marker='x', ms=2, ls=':', lw=1, e_capsize=2, e_capthick=.5, picker=None, **kws ): """ convinience function to make an error bar instance Parameters ------------ ax: matplotlib.axes instance axes to put error bar plot on x_array: np.ndarray(nx) array of x values to plot y_array: np.ndarray(nx) array of y values to plot y_error: np.ndarray(nx) array of errors in y-direction to plot x_error: np.ndarray(ns) array of error in x-direction to plot color: string or (r, g, b) color of marker, line and error bar marker: string marker type to plot data as ms: float size of marker ls: string line style between markers lw: float width of line between markers e_capsize: float size of error bar cap e_capthick: float thickness of error bar cap picker: float radius in points to be able to pick a point. Returns: --------- errorbar_object: matplotlib.Axes.errorbar error bar object containing line data, errorbars, etc. """ # this is to make sure error bars #plot in full and not just a dashed line eobj = ax.errorbar( x_ar, y_ar, marker=marker, ms=ms, mfc='None', mew=lw, mec=color, ls=ls, xerr=x_err, yerr=y_err, ecolor=color, color=color, picker=picker, lw=lw, elinewidth=lw, capsize=e_capsize, # capthick=e_capthick **kws ) return eobj
[docs]def get_color_palette (RGB_color_palette): """ Convert RGB color into matplotlib color palette. In the RGB color system two bits of data are used for each color, red, green, and blue. That means that each color runson a scale from 0 to 255. Black would be 00,00,00, while white would be 255,255,255. Matplotlib has lots of pre-defined colormaps for us . They are all normalized to 255, so they run from 0 to 1. So you need only normalize data, then we can manually select colors from a color map :param RGB_color_palette: str value of RGB value :type RGB_color_palette: str :returns: rgba, tuple of (R, G, B) :rtype: tuple :Example: >>> from watex.utils.plotutils import get_color_palette >>> get_color_palette (RGB_color_palette ='R128B128') """ def ascertain_cp (cp): if cp >255. : warnings.warn( ' !RGB value is range 0 to 255 pixels , ' 'not beyond !. Your input values is = {0}.'.format(cp)) raise ValueError('Error color RGBA value ! ' 'RGB value provided is = {0}.' ' It is larger than 255 pixels.'.format(cp)) return cp if isinstance(RGB_color_palette,(float, int, str)): try : float(RGB_color_palette) except : RGB_color_palette= RGB_color_palette.lower() else : return ascertain_cp(float(RGB_color_palette))/255. rgba = np.zeros((3,)) if 'r' in RGB_color_palette : knae = RGB_color_palette .replace('r', '').replace( 'g', '/').replace('b', '/').split('/') try : _knae = ascertain_cp(float(knae[0])) except : rgba[0]=1. else : rgba [0] = _knae /255. if 'g' in RGB_color_palette : knae = RGB_color_palette .replace('g', '/').replace( 'b', '/').split('/') try : _knae =ascertain_cp(float(knae[1])) except : rgba [1]=1. else :rgba[1]= _knae /255. if 'b' in RGB_color_palette : knae = knae = RGB_color_palette .replace('g', '/').split('/') try : _knae =ascertain_cp(float(knae[1])) except : rgba[2]=1. else :rgba[2]= _knae /255. return tuple(rgba)
def _get_xticks_formatage ( ax, xtick_range, space= 14 , step=7, fmt ='{}',auto = False, ticks ='x', **xlkws): """ Skip xticks label at every number of spaces :param ax: matplotlib axes :param xtick_range: list of the xticks values :param space: interval that the label must be shown. :param step: the number of label to skip. :param fmt: str, formatage type. :param ticks: str, default='x', the ticks axis to format the labels. can be ``'y'``. :param auto: bool , if ``True`` a dynamic tick formatage will start. """ def format_ticks (ind, x): """ Format thick parameter with 'FuncFormatter(func)' rather than using:: axi.xaxis.set_major_locator (plt.MaxNLocator(3)) ax.xaxis.set_major_formatter (plt.FuncFormatter(format_thicks)) """ if ind % step ==0: return fmt.format (ind) else: None # show label every 'space'samples if auto: space = 10. step = int (np.ceil ( len(xtick_range)/ space )) rotation = xlkws.get('rotation', 90 ) if 'rotation' in xlkws.keys ( ) else xlkws.get('rotate_xlabel', 90 ) if len(xtick_range) >= space : if ticks=='y': ax.yaxis.set_major_formatter (plt.FuncFormatter(format_ticks)) else: ax.xaxis.set_major_formatter (plt.FuncFormatter(format_ticks)) plt.setp(ax.get_yticklabels() if ticks=='y' else ax.get_xticklabels(), rotation = rotation ) else: # ax.xaxis.set_major_locator(mpl.ticker.MaxNLocator(3)) # # ticks_loc = ax.get_xticks().tolist() # ax.xaxis.set_major_locator(mpl.ticker.FixedLocator(ticks_loc)) # ax.set_xticklabels([fmt.format(x) for x in ticks_loc]) tlst = [fmt.format(item) for item in xtick_range] ax.set_yticklabels(tlst, **xlkws) if ticks=='y' \ else ax.set_xticklabels(tlst, **xlkws) def _set_sns_style (s, /): """ Set sns style whether boolean or string is given""" s = str(s).lower() s = re.sub(r'true|none', 'darkgrid', s) return sns.set_style(s) def _is_target_in (X, y=None, tname=None): """ Create new target name for tname if given :param X: dataframe dataframe containing the data for plotting :param y: array or series target data for plotting. Note that multitarget outpout is not allowed yet. Moroever, it `y` is given as a dataframe, 'tname' must be supplied to retrive y as a pandas series object, otherwise an error will raise. :param tname: str, target name. If given and `y` is ``None``, Will try to find `tname` in the `X` columns. If 'tname' does not exist, plot for target is cancelled. :return y: Series """ _assert_all_types(X, pd.DataFrame) if y is not None: y = _assert_all_types(y , pd.Series, pd.DataFrame, np.ndarray) if hasattr (y, 'columns'): if tname not in (y.columns): tname = None if tname is None: raise TypeError ( "'tname' must be supplied when y is a dataframe.") y = y [tname ] elif hasattr (y, 'name'): tname = tname or y.name # reformat inplace the name of series y.name = tname elif hasattr(y, '__array__'): y = pd.Series (y, name = tname or 'target') elif y is None: if tname in X.columns : y = X.pop(tname) return X, y def _toggle_target_in (X , y , pos=None): """ Toggle the target in the convenient position. By default the target plot is the last subplots :param X: dataframe dataframe containing the data for plotting :param y: array or series the target for plotting. :param pos: int, the position to insert y in the dataframe X By default , `y` is located at the last position :return: Dataframe Dataframe containing the target 'y' """ pos = 0 if pos ==0 else ( pos or X.shape [1]) pos= int ( _assert_all_types(pos, int, float ) ) ms= ("The positionning of the target is out of the bound." "{} position is used instead.") if pos > X.shape[1] : warnings.warn(ms.format('The last')) pos=X.shape[1] elif pos < 0: warnings.warn(ms.format( " Negative index is not allowed. The first") ) pos=0 X.insert (pos, y.name, y ) return X def _skip_log10_columns ( X, column2skip, pattern =None , inplace =True): """ Skip the columns that dont need to put value in logarithms. :param X: dataframe pandas dataframe with valid columns :param column2skip: list or str , List of columns to skip. If given as string and separed by the default pattern items, it should be converted to a list and make sure the columns name exist in the dataframe. Otherwise an error with raise. :param pattern: str, default = '[#&*@!,;\s]\s*' The base pattern to split the text in `column2skip` into a columns :return X: Dataframe Dataframe modified inplace with values computed in log10 except the skipped columns. :example: >>> from watex.datasets import load_hlogs >>> from watex.utils.plotutils import _skip_log10_columns >>> X0, _= load_hlogs (as_frame =True ) >>> # let visualize the first3 values of `sp` and `resistivity` keys >>> X0['sp'][:3] , X0['resistivity'][:3] ... (0 -1.580000 1 -1.580000 2 -1.922632 Name: sp, dtype: float64, 0 15.919130 1 16.000000 2 24.422316 Name: resistivity, dtype: float64) >>> column2skip = ['hole_id','depth_top', 'depth_bottom', 'strata_name', 'rock_name', 'well_diameter', 'sp'] >>> _skip_log10_columns (X0, column2skip) >>> # now let visualize the same keys values >>> X0['sp'][:3] , X0['resistivity'][:3] ... (0 -1.580000 1 -1.580000 2 -1.922632 Name: sp, dtype: float64, 0 1.201919 1 1.204120 2 1.387787 Name: resistivity, dtype: float64) >>> # it is obvious the `resistiviy` values is log10 >>> # while `sp` still remains the same """ X0 = X.copy () if not is_iterable( column2skip): raise TypeError ("Columns to skip expect an iterable object;" f" got {type(column2skip).__name__!r}") pattern = pattern or r'[#&*@!,;\s]\s*' if isinstance(column2skip, str): column2skip = str2columns (column2skip, pattern=pattern ) #assert whether column to skip is in if column2skip: cskip = copy.deepcopy (column2skip) column2skip = is_in_if(X.columns, column2skip, return_diff= True) if len(column2skip) ==len (X.columns): warnings.warn("Value(s) to skip are not detected.") if inplace : X[column2skip] = np.log10 ( X[column2skip] ) X.drop (columns =cskip , inplace =True ) return else : X0[column2skip] = np.log10 ( X0[column2skip] ) return X0
[docs]def plot_bar(x, y, wh= .8, kind ='v', fig_size =(8, 6), savefig=None, xlabel =None, ylabel=None, fig_title=None, **bar_kws): """ Make a vertical or horizontal bar plot. The bars are positioned at x or y with the given alignment. Their dimensions are given by width and height. The horizontal baseline is left (default 0) while the vertical baseline is bottom (default=0) Many parameters can take either a single value applying to all bars or a sequence of values, one for each bar. Parameters ----------- x: float or array-like The x coordinates of the bars. is 'x' for vertical bar plot as `kind` is set to ``v``(default) or `y` for horizontal bar plot as `kind` is set to``h``. See also align for the alignment of the bars to the coordinates. y: float or array-like The height(s) for vertical and width(s) for horizonatal of the bars. wh: float or array-like, default: 0.8 The width(s) for vertical or height(s) for horizaontal of the bars. kind: str, ['vertical', 'horizontal'], default='vertical' The kind of bar plot. Can be the horizontal or vertical bar plots. bar_kws: dict, Additional keywords arguments passed to : :func:`~matplotlib.pyplot.bar` or :func:`~matplotlib.pyplot.barh`. """ assert str(kind).lower().strip() in ("vertical", 'v',"horizontal", "h"), ( "Support only the horizontal 'h' and vertical 'v' bar plots." " Got {kind!r}") kind =str(kind).lower().strip() fig, ax = plt.subplots(nrows=1, ncols=1, figsize =fig_size) if kind in ("vertical", "v"): ax.bar (x, height= y, width = wh , **bar_kws) elif kind in ("horizontal", "h"): ax.barh (x , width =y , height =wh, **bar_kws) ax.set_xlabel (xlabel ) ax.set_ylabel(ylabel) ax.set_title (fig_title) if savefig is not None: savefigure (fig, savefig, dpi = 300) plt.close () if savefig is not None else plt.show()
[docs]def plot_profiling ( erp, /, station = None, cz=None, *, style = 'classic', fig_size = (10, 4), cz_plot_kws= None, marker_kws= None, savefig =None, ax =None, fig=None, **plot_kws ): """ Visualizes the resistivity profiling of ERP data. Function can overlain the selected conductive zone to the ERP if `cz` is given. Parameters ----------- erp: array_like 1d The electrical resistivity profiling array. If dataframe is passed, `resistivity` column must be included. .. versionchanged:: 0.2.1 Can henceforth accept dataframe that contains resistivity values. station: str, int, optional Station is used to visualize the conductive zone in the `erp` profile. This seems useful if `cz` is not given. When `station='auto'` it automatically detect the best conductive zone assuming the very low resistivity in the profile and plot the conductive zone. To have the expected results, `station` position or `cz` must be given or the . .. versionadded:: 0.2.1 Can henceforth pass the station to plot the conductive zone. cz: array_like, optional, The selected conductive zone. If ``None``, `cz` should not be plotted. style: str, default='classic' Matplotlib plottings style. fig_size: tuple, default= (10, 4) Matplotlib figure size. marker_kws: dict, default = {'marker':'o', 'c':'#9EB3DD' } The dictionnary to customize marker in the plot cz_plot_kws: dict, default = {'ls':'-','c':'#0A4CEE', 'lw'L2 } The dictionnary to customize the conductize zone in the plot. savefig: str, optional Save figure name. The default resolution dot-per-inch is ``300``. ax: Matplotlib.pyplot.Axes, optional Axe to collect the figure. .. versionadded:: 0.2.8 fig: Matplotlib.pyplot.figure, optional Supply fig to save automatically the plot, otherwise, keep it to ``None``. plot_kws: dict, Additional keyword arguments passed to :func:`matplotlib.pyplot.plot` function Return -------- ax: Matplotlib.pyplot.Axis Return axis Examples ---------- >>> from watex.datasets import make_erp >>> from watex.utils.plotutils import plot_profiling >>> d= make_erp (n_stations =56, seed = 42) >>> plot_profiling (d.resistivity) >>> # read the frame and get the resistivity values >>> plot_profiling (d.frame, station ='s07' ) <AxesSubplot:xlabel='Stations', ylabel='App.resistivity ($\\Omega.m$)'> """ plt.style.use (style ) if hasattr ( erp , 'columns') and hasattr ( erp , '__array__'): if 'resistivity' not in erp.columns : raise TypeError ("Missing resistivity column in the data.") erp = erp.resistivity erp = check_y (erp , input_name ="sample of ERP data") if station is not None: from .coreutils import defineConductiveZone auto =False if str(station).lower().strip () =='auto': auto = True ; station =None cz, *_ = defineConductiveZone( erp , station = station , auto= auto ) if ax is None: fig, ax = plt.subplots(1,1, figsize =fig_size) leg =[] zl, = ax.plot(np.arange(len(erp)), erp, label ='Electrical resistivity profiling', **plot_kws ) marker_kws = marker_kws or dict (marker ='o', c='#9EB3DD' ) ax.scatter (np.arange(len(erp)), erp, **marker_kws ) leg.append(zl) if cz is not None: cz= check_y (cz, input_name ="Conductive zone 'cz'") z = np.ma.masked_values (erp, np.isin(erp, cz )) sample_masked = np.ma.array( erp, mask = ~z.fill_value.astype('bool') ) cz_plot_kws = cz_plot_kws or dict (ls='-',c='#0A4CEE', lw =2 ) czl, = ax.plot( np.arange(len(erp)), sample_masked, label ='Conductive zone', **cz_plot_kws ) leg.append(czl) ax.set_xticks(range(len(erp))) if len(erp ) >= 14 : ax.xaxis.set_major_formatter (plt.FuncFormatter(_format_ticks)) else : ax.set_xticklabels( ['S{:02}'.format(int(i)+1) for i in range(len(erp))]) ax.set_xlabel('Stations') ax.set_ylabel('App.resistivity ($\Omega.m$)') ax.legend( handles = leg, loc ='best') ax.set_xlim ([-1, len(erp)]) if savefig is not None: savefigure (fig, savefig, dpi = 300) plt.close () if savefig is not None else plt.show() return ax
[docs]def plot_skew ( edi_obj, method='Bahr', mode=None, threshold_line =None, fig_size = (7, 5), savefig = None, view=None, style=None, **kws ): """ Plot phase sensitive skew visualization. Phase Sensitivity Skew (:math:`\eta`) is a dimensionality tool that represents a measure of the skew of the phases of the impedance tensor. The parameter is thus unaffected by the distortion effect, unlike the Swift-skew and ellipticity dimensionality tools [1]_. Values of :math:`\eta` > 0.3 are considered to represent 3D data. Phase-sensitive skews less than 0.1 indicate 1D, 2D or distorted 2D (3-D /2-D) cases. Values of :math:`\eta` between 0.1 and 0.3 indicates modified 3D/2D structures [2]_ according to `Bahr' methods. However, values :math:`\eta >=0.2` using the `Swift` methods, the smaller the value :math:`\eta` ( close to :math:`0.`), the closer the structure to 2D structure and vice versa.However, it is generally considered that an electrical structure of :math:`\eta < 0.4` can be treated as a 2D medium. Here as the ``threshold_line`` for :meth:`\eta` using the Swift method should be set as `0.4`. .. versionadded:: 0.1.5 Parameters ----------- edi_obj: str, :class:`watex.edi.Edi` Full path to edifiles or :class:`~watex.edi.Edi` object. method: str, default='Bahr': Kind of correction. Can be ``swift`` for the remove distorsion proposed by Swift in 1967 [3]_. The value close to 0. assume the 1D and 2D structures, and 3D otherwise. Conversly to ``bahr`` for the remove distorsion proposed by Bahr in 1991 [2]_. The latter threshold is set to 0.3. Above this value the structures is 3D. threshold_line: float, optional Visualize th threshold line. Can be ['bahr', 'swift', 'both']: - Note that when method is set to ``swift``, the value close to close to :math:`0.` assume the 1D and 2D structures, and 3D otherwise. - when method is set to ``Bahr``, :math:`\mu > 0.3`` is 3D structures, between :math:`[0.1 - 0.3]` assumes modified 3D/2D structures whereas :math:`<0.1` 1D, 2D or distorted 2D. mode:str, optional X-axis coordinates for visualisation. plot either ``'frequency'`` or ``'periods'``. The default is ``'frequency'`` view: str, default='skew' phase sensistive visualization. Can be rotational invariant ``invariant``. Note that setting to ``mu`` or ``invariant`` does not change any interpretation since the distortion of Z are all rotational invariant whether using the ``Bahr`` or ``swift`` methods. fig_size: tuple, default= (10, 4) Matplotlib figure size. savefig: str, optional Save figure name. The default resolution dot-per-inch is ``300``. style: str, default='classic' Matplotlib plottings style. kws: dict, Matplotlib Axes scatterplot additional keywords arguments. Return -------- ax: Matplotlib.pyplot.Axis Return axis See Also --------- watex.methods.em.Processing.skew: Skew equation formulations. watex.view.TPlot.plotSkew: Give a consistent plot where user can customize the plot using the plot parameter of :class:`watex.property.BasePlot` class. References ----------- .. [1] Bahr, K. (1988) Interpretation of the magnetotelluric impedance tensor: regional induction 395 and local telluric distortion. J. Geophys. Res., 62, 119–127. .. [2] Bahr, K. (1991) Geological noise in magnetotelluric data: a classification of distortion types. 397 Phys. Earth Planet. Inter., 66, 24–38. .. [3] Bahr, K., 1991. Geological noise in magnetotelluric data: a classification of distortion types. Physics of the Earth and Planetary Interiors 66 (1–2), 24–38. Examples --------- >>> import watex as wx >>> from watex.utils.plotutils import plot_skew >>> edi_sk = wx.fetch_data ("edis", return_data =True , samples = 20 ) >>> plot_skew (edi_sk) >>> plot_skew (edi_sk, threshold_line= True) """ if style is not None: plt.style.use (style ) view = view or 'skew' if ('inv' in str (view).lower() or 'rot' in str (view).lower() or 'mu' in str (view).lower() ) : view ='mu' if 'period' in str(mode).lower(): mode ='period' if str(threshold_line).lower()=='true': threshold_line = str(method).lower() import watex as wx po = wx.EMAP().fit(edi_obj) # remove the outliers in the data # and filled with NaN skew, mu =po.skew(method = method, suppress_outliers = True ) freqs = 1/ po.freqs_ if mode =='period' else po.freqs_ ymat = skew if view =='skew' else mu fig, ax = plt.subplots(1,1, figsize =fig_size) #---manage threshold line ------ thr_code = {"bahr": [1] , "swift":[ 2] , 'both':[1, 2] } if threshold_line is not None: if str(threshold_line).lower() in ("*", "both"): threshold_line = 'both' ct = thr_code.get(str(threshold_line).lower(), None ) for i in range (skew.shape[1]): ax.scatter ( freqs, reshape (ymat[:, i]),**kws ) if ct: for m in ct: plt.axhline(y=0.4 if m==2 else 0.3 , color="k" if m==1 else "r", linestyle="-", label=f'threshold: $\mu={0.4 if m==2 else 0.3}$' ) ax.legend() ax.set_xscale('log') ax.set_xlabel('Period ($s$)' if mode=='period' else 'Frequency ($H_z$)') ax.set_ylabel(f"{'Skew' if view =='skew' else 'Rot.Invariant'}" + "($\mu$)") plt.xlim ([ freqs.min() , freqs.max()]) #plt.xlim() if savefig is not None: savefigure (fig, savefig, dpi = 300) plt.close () if savefig is not None else plt.show() return ax
def _format_ticks (value, tick_number, fmt ='S{:02}', nskip =7 ): """ Format thick parameter with 'FuncFormatter(func)' rather than using `axi.xaxis.set_major_locator (plt.MaxNLocator(3))` ax.xaxis.set_major_formatter (plt.FuncFormatter(format_thicks)) :param value: tick range values for formatting :param tick_number: number of ticks to format :param fmt: str, default='S{:02}', kind of tick formatage :param nskip: int, default =7, number of tick to skip """ if value % nskip==0: return fmt.format(int(value)+ 1) else: None #XXX OPTIMIZE
[docs]def plot_confidence ( data = None, *, y=None, x=None, ci =.95 , kind ='line', b_samples = 1000, **sns_kws ): """ Plot confidence data Confidence Interval (CI) is a type of estimate computed from the statistics of the observed data which gives a range of values that’s likely to contain a population parameter with a particular level of confidence. CI as a concept was put forth by Jerzy Neyman in a paper published in 1937. There are various types of the confidence interval, some of the most commonly used ones are: CI for mean, CI for the median, CI for the difference between means, CI for a proportion and CI for the difference in proportions. Parameters ------------ data: pandas.DataFrame, numpy.ndarray, mapping, or sequence Input data structure. Either a long-form collection of vectors that can be assigned to named variables or a wide-form dataset that will be internally reshaped. x, y: vectors or keys in data Variables that specify positions on the x and y axes. ci: float, default=.95 Confidence value. kind: str, default='line' kind of confidence intervval plot. b_samples: int, default=1000 Number of bootstraps to use for computing the confidence interval. sns_kws: dict, Keywords arguments passed to the `sns.lineplot` or `sns.regplot` Returns ---------- ax: matplotlib.axes.Axes The matplotlib axes containing the plot. """ #y = np.array (y) #x= x or ( np.arange (len(y)) if ax=None if 'lin' in str(kind).lower(): ax = sns.lineplot(data= data, x=x, y=y, ci=ci, **sns_kws) elif 'reg' in str(kind).lower(): ax = sns.regplot(data = data, x=x, y=y, ci=ci, **sns_kws ) else: if not y: raise ValueError("y should not be None when using the boostrapping" " for plotting the confidence interval.") b_samples = _assert_all_types( b_samples, int, float, objname="Bootstrap samples `b_samples`") from sklearn.metrics import resample # configure bootstrap n_iterations = 1000 # here k=no. of bootstrapped samples n_size = int(len(y)) # run bootstrap medians = list() for i in range(n_iterations): s = resample(y, n_samples=n_size); m = np.median(s); medians.append(m) # plot scores plt.hist(medians) plt.show() # confidence intervals p = ((1.0-ci)/2.0) * 100 lower = np.percentile(medians, p) p = (ci+((1.0-ci)/2.0)) * 100 upper = np.percentile(medians, p) print(f"\n{ci*100} confidence interval {lower} and {upper}") return ax
[docs]def plot_confidence_ellipse (x, y ): """ Plot a confidence ellipse of a two-dimensional dataset This function plots the confidence ellipse of the covariance of the given array-like variables x and y. The ellipse is plotted into the given axes-object ax. The approach that is used to obtain the correct geometry is explained and proved here: https://carstenschelp.github.io/2018/09/14/Plot_Confidence_Ellipse_001.html The method avoids the use of an iterative eigen decomposition algorithm and makes use of the fact that a normalized covariance matrix (composed of pearson correlation coefficients and ones) is particularly easy to handle. """ fig, ax_nstd = plt.subplots(figsize=(6, 6)) # dependency_nstd = [[0.8, 0.75], # [-0.2, 0.35]] mu = 0, 0 # scale = 8, 5 ax_nstd.axvline(c='grey', lw=1) ax_nstd.axhline(c='grey', lw=1) #x, y = get_correlated_dataset(500, dependency_nstd, mu, scale) ax_nstd.scatter(x, y, s=0.5) confidence_ellipse(x, y, ax_nstd, n_std=1, label=r'$1\sigma$', edgecolor='firebrick') confidence_ellipse(x, y, ax_nstd, n_std=2, label=r'$2\sigma$', edgecolor='fuchsia', linestyle='--') confidence_ellipse(x, y, ax_nstd, n_std=3, label=r'$3\sigma$', edgecolor='blue', linestyle=':') ax_nstd.scatter(mu[0], mu[1], c='red', s=3) ax_nstd.set_title('Different standard deviations') ax_nstd.legend() plt.show()
[docs]def confidence_ellipse( x, y, ax, n_std=3.0, facecolor='none', **kwargs ): """ Create a plot of the covariance confidence ellipse of *x* and *y*. Parameters ---------- x, y : array-like, shape (n, ) Input data. ax : matplotlib.axes.Axes The axes object to draw the ellipse into. n_std : float The number of standard deviations to determine the ellipse's radiuses. **kwargs Forwarded to `~matplotlib.patches.Ellipse` Returns ------- mpl.patches.Ellipse """ if x.size != y.size: raise ValueError("x and y must be the same size") cov = np.cov(x, y) pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) # Using a special case to obtain the eigenvalues of this # two-dimensional dataset. ell_radius_x = np.sqrt(1 + pearson) ell_radius_y = np.sqrt(1 - pearson) ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, facecolor=facecolor, **kwargs) # Calculating the standard deviation of x from # the squareroot of the variance and multiplying # with the given number of standard deviations. scale_x = np.sqrt(cov[0, 0]) * n_std mean_x = np.mean(x) # calculating the standard deviation of y ... scale_y = np.sqrt(cov[1, 1]) * n_std mean_y = np.mean(y) transf = transforms.Affine2D() \ .rotate_deg(45) \ .scale(scale_x, scale_y) \ .translate(mean_x, mean_y) ellipse.set_transform(transf + ax.transData) return ax.add_patch(ellipse)
[docs]def plot_strike ( list_of_edis, /, kind = 2, period_tolerance=.05, text_pad =1.65 , rot_z=0. , **kws ): extra =("PlotStrike uses 'mtpy' or 'pycsamt' as dependency." ) import_optional_dependency ('mtpy', extra = extra ) #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx from mtpy.imaging.plotstrike import PlotStrike from ..property import IsEdi #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx if isinstance ( list_of_edis, str): if os.path.isdir ( list_of_edis ): list_of_edis = [os.path.join( f) for f in os.listdir (list_of_edis) if str(f).lower().endswith ('.edi')] if os.path.isfile (list_of_edis): list_of_edis =[list_of_edis ] # now check whether is valid EDI # list comprehension faster than # tuple (map (lambda f: IsEdi._assert_edi (f ), list_of_edis ) ) [ IsEdi._assert_edi (f ) for f in list_of_edis ] # suppress third party verbosity with nullify_output(): PlotStrike( fn_list=list_of_edis, plot_type=kind, **kws )
plot_strike.__doc__=""" Plot the strike estimated from the invariants and phase tensor. in a rose diagram of xy plot. Parameters ------------ list_of_edis: list, full paths to .edi files to plot or list of :term:`EDI` files. .. versionchanged:: 0.2.0 No need to provide a list of term:`EDI` files. Henceforth `list_of_edis` accepts the EDI path-like object of single EDI file then asserts the validity of the EDI files afterward. kind: int, default=2 Can be [ 1 | 2 ] where: - *1* to plot individual decades in one plot - *2* to plot all period ranges into one polar diagram for each strike angle estimation One could try also plot_type = 1 to plot by decade fig_num: int, default=1, figure number to be plotted. *Default* is 1 font_size: float, default=10, Figure size rot_z: float, default=0., angle of rotation clockwise positive. period_tolerance: float, default=.05 Tolerance level to match periods from different edi files. *Default* is 0.05 text_pad: float, default=1.65 padding of the angle label at the bottom of each polar diagram. *Default* is 1.65 plot_range: str, tuple The period range to estimate the strike angle. It can be [ 'data' | (period_min,period_max) ]. Options are: * *'data'* for estimating the strike for all periods in the data. * (pmin,pmax) for period min and period max, input as (log10(pmin),log10(pmax)) plot_tipper: [ True | False ] - True to plot the tipper strike - False to not plot tipper strike pt_error_floor: int, optional Maximum error in degrees that is allowed to estimate strike. *Default* is None allowing all estimates to be used. fold: [ True | False ] * True to plot only from 0 to 180 * False to plot from 0 to 360 plot_orthogonal: [ True | False] * True to plot the orthogonal strike directions * False to not color: [ True | False ] * True to plot shade colors * False to plot all in one color color_inv:str, color of invariants plots color_pt: str, color of phase tensor plots color_tip: str color of tipper plots ring_spacing: float, optional spacing of rings in polar plots ring_limits: tuple of int, plot limits (min count, max count) set each plot have these limits plot_orientation: str, [ 'h' | 'v' ] horizontal or vertical plots See More -------- Plots the strike angle as determined by invariants of the impedance tensor (Weaver et al. [2003] [1]_) and phase tensor azimuth (Caldwell et al. [2004] [2]_) The data is split into decades where the histogram for each is plotted in the form of a rose diagram with a range of 0 to 180 degrees. Where 0 is North and 90 is East. The median angle of the period band is set in polar diagram. The top row is the strike estimated from the invariants of the impedance tensor. The bottom row is the azimuth estimated from the phase tensor. If tipper is 'y' then the 3rd row is the strike determined from the tipper, which is orthogonal to the induction arrow direction. References ---------- .. [1] Weaver J.T, Lilley F.E.M.(2003) Invariants of rotation of axes and indicators of dimensionality in magnetotellurics, Australian National University, University of Victoria; http://bib.gfz-potsdam.de/emtf/2007/pdf/lilley.pdf .. [2] T. Grant Caldwell, Hugh M. Bibby, Colin Brown, The magnetotelluric phase tensor, Geophysical Journal International, Volume 158, Issue 2, August 2004, Pages 457–469, https://doi.org/10.1111/j.1365-246X.2004.02281.x Examples ---------- >>> import os >>> from watex.datasets import fetch_data >>> from watex.utils.plotutils import plot_strike >>> from watex.datasets._io import get_data # get edidata in cache >>> fetch_data ( 'huayuan', samples = 25 ) # store edi in cache >>> # get the edi in cache and plotStrike >>> edi_fn_lst = [os.path.join(get_data(),ff) for ff in os.listdir(get_data()) ... if ff.endswith('.edi')] >>> plot_strike(edi_fn_lst ) """
[docs]def plot_text ( x, y, text=None , data =None, coerce =False, basename ='S', fig_size =( 7, 7 ), show_line =False, step = None , xlabel ='', ylabel ='', color= 'k', mcolor='k', lcolor=None, show_leg =False, linelabel='', markerlabel='', ax=None, **text_kws ): """ Plot text(s) indicating each position in the line. Parameters ----------- x, y: str, float, Array-like The position to place the text. By default, this is in data coordinates. The coordinate system can be changed using the transform parameter. text: str, The text data: pd.DataFrame, Data containing x and y names. Need to be supplied when x and y are given as string names. coerce:bool, default=False Force the plot despite the given textes do not match the number of positions `x` and `y`. If ``False``, number of positions must be consistent with x and y, otherwise error raises. basename: str, default='S' the text to prefix the position when the text is not given. fig_size: tuple, default=(7, 7) Matplotlib figure size. show_line: bool, default=False Display the line from x, y. step: int,Optional The number of intermediate positions to skip in the plotting text. xlabel, ylabel: str, Optional, The labels of x and y. color: str, default='k', Text color. mcolor: str, default='k', Marker color. lcolor: str, Optional Line color if `show_line` is set to ``True``. show_leg: bool, default=False Display the legend of line and marker labels. linelabel, markerlabel: str, Optional The labels of the line and marker. ax: Matplotlib.Axes, optional Support plot to another axes .. versionadded:: 0.2.5 text_kws: dict, Keyword arguments passed to :meth:`matplotlib.axes.Axes.text`. Return ------- ax: Matplotlib axes Examples -------- >>> import watex as wx >>> data =wx.make_erp (as_frame =True, n_stations= 7 ) >>> x , y =[ 0, 1, 3 ], [2, 3, 6] >>> texto = ['AMT-E1147', 'AMT-E1148', 'AMT-E180'] >>> plot_text (x, y , text = texto)# no need to set coerce, same length >>> data =wx.make_erp (as_frame =True, n_stations= 20 ) >>> x , y = data.easting, data.northing >>> text1 = ['AMT-E1147', 'AMT-E1148', 'AMT-E180'] >>> plot_text (x, y , coerce =True , text = text1 , show_leg= True, show_line=True, linelabel='E1-line', markerlabel= 'Site', basename ='AMT-E0' ) """ # assume x, y series are passed if isinstance(x, str) or hasattr ( x, 'name'): xlabel = x if isinstance(x, str) else x.name if isinstance(y, str) or hasattr ( y, 'name'): ylabel = y if isinstance(y, str) else y.name if x is None and y is None: raise TypeError("x and y are needed for text plot. NoneType" " cannot be plotted.") x, y = assert_xy_in(x, y, data = data ) if text is None and not coerce: raise TypeError ("Text cannot be plotted. To force plotting text with" " the basename, set ``coerce=True``.") text = is_iterable(text , exclude_string= True , transform =True ) if ( len(text) != len(y) and not coerce) : raise ValueError("In principle text array and x/y must be consistent." f" Got {len(text)} and {len(y)}. To plot anyway," " set ``coerce=True``.") if coerce : basename =str(basename) text += [f'{basename}{i+len(text):02}' for i in range (len(y) )] if step is not None: step = _assert_all_types(step , float, int , objname ='Step') for ii in range(len(text)): if not ii% step ==0: text[ii]='' if ax is None: fig, ax = plt.subplots(1,1, figsize =fig_size) # plot = ax.scatter if show_line else ax.plot ax_m = None if show_line: ax.plot (x, y , label = linelabel, color =lcolor ) for ix, iy , name in zip (x, y, text ): ax.text ( ix , iy , name , color = color, **text_kws) if name !='': ax_m = ax.scatter ( [ix], [iy] , marker ='o', color =mcolor, ) ax.set_xlabel (xlabel) ax.set_ylabel (ylabel) ax_m.set_label ( markerlabel) if ax_m is not None else None if show_leg : ax.legend () return ax
[docs]def plot_voronoi( X, y, *, cluster_centers, ax= None, show_vertices=False, line_colors='k', line_width=1. , line_alpha=1., fig_size = (7, 7), fig_title = '' ): """Plots the Voronoi diagram of the k-means clusters overlaid with the data Parameters ----------- X, y : NDarray, Arraylike 1d Data training X and y. Must have the same length cluster_center: int, Cluster center. Cluster center can be obtain withe KMeans algorithms show_vertices : bool, optional Add the Voronoi vertices to the plot. line_colors : string, optional Specifies the line color for polygon boundaries line_width : float, optional Specifies the line width for polygon boundaries line_alpha : float, optional Specifies the line alpha for polygon boundaries point_size : float, optional Specifies the size of points ax: Matplotlib.Axes Maplotlib axes. If `None`, a axis is created instead. fig_size: tuple, default = (7, 7) Size of the figures. Return ------- ax: Matplotlib.Axes Axes to support the figure Examples --------- >>> from sklearn.datasets import make_moons >>> from sklearn.cluster import KMeans >>> from watex.utils.plotutils import plot_voronoi >>> X, y = make_moons(n_samples=2000, noise=0.2) >>> km = KMeans (n_init ='auto').fit(X, y ) >>> plot_voronoi ( X, y , cluster_centers = km.cluster_centers_) """ X, y = check_X_y(X, y, ) cluster_centers = check_array(cluster_centers ) if ax is None: fig, ax = plt.subplots(1,1, figsize =fig_size) from scipy.spatial import Voronoi, voronoi_plot_2d ax.scatter(X[:, 0], X[:, 1], c=y, cmap='Set1', alpha=0.2, label = 'Voronoi plot') vor = Voronoi(cluster_centers) voronoi_plot_2d(vor, ax=ax, show_vertices=show_vertices, alpha=0.5, line_colors=line_colors, line_width=line_width , line_alpha=line_alpha, ) #ax.legend() ax.set_title (fig_title , fontsize=20) #fig.suptitle(fig_title, fontsize=20) return ax
def _make_axe_multiple ( n, ncols = 3 , fig_size =None, fig =None, ax= ... ): """ Make multiple subplot axes from number of objects. """ if is_iterable (n): n = len(n) nrows = n // ncols + ( n % ncols ) if nrows ==0: nrows =1 if ax in ( ... , None) : fig, ax = plt.subplots (nrows, ncols, figsize = fig_size ) return fig , ax
[docs]def plot_roc_curves ( clfs, /, X, y, names =..., colors =..., ncols = 3, score=False, kind="inone", ax = None, fig_size=( 7, 7), **roc_kws ): """ Quick plot of Receiving Operating Characterisctic (ROC) of fitted models Parameters ------------ clfs: list, list of models for ROC evaluation. Model should be a scikit-learn or XGBoost estimators X : {array-like, sparse matrix} of shape (n_samples, n_features) Training instances to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous. If a sparse matrix is passed, a copy will be made if it's not in CSR format. y : ndarray or Series of length (n_samples, ) An array or series of target or class values. Preferably, the array represent the test class labels data for error evaluation. names: list, List of model names. If not given, a raw name of the model is passed instead. kind: str, default='inone' If ``['individual'|'2'|'single']``, plot each ROC model separately. Any other value, group of ROC curves into a single plot. .. versionchanged:: 0.2.5 Parameter `all` is deprecated and replaced by `kind`. It henceforth accepts arguments ``allinone|1|grouped`` or ``individual|2|single`` for plotting mutliple ROC curves in one or separate each ROC curves respecively. colors : str, list Colors to specify each model plot. ncols: int, default=3 Number of plot to be placed inline before skipping to the next column. This is feasible if `many` is set to ``True``. score: bool,default=False Append the Area Under the curve score to the legend. .. versionadded:: 0.2.4 kws: dict, keyword argument of :func:`sklearn.metrics.roc_curve Return ------- ax: Axes.Subplot. Examples -------- >>> from watex.utils.plotutils import plot_roc_curves >>> from sklearn.datasets import make_moons >>> from watex.exlib import ( train_test_split, KNeighborsClassifier, SVC , XGBClassifier, LogisticRegression ) >>> X, y = make_moons (n_samples=2000, noise=0.2) >>> X, Xt, y, yt = train_test_split (X, y, test_size=0.2) >>> clfs = [ m().fit(X, y) for m in ( KNeighborsClassifier, SVC , XGBClassifier, LogisticRegression)] >>> plot_roc_curves(clfs, Xt, yt) Out[66]: <AxesSubplot:xlabel='False Positive Rate (FPR)', ylabel='True Positive Rate (FPR)'> >>> plot_roc_curves(clfs, Xt, yt,kind='2', ncols = 4 , fig_size = (10, 4)) """ from .validator import get_estimator_name kind = '2' if str(kind).lower() in 'individual2single' else '1' def plot_roc(model, data, labels, score =False ): if hasattr(model, "decision_function"): predictions = model.decision_function(data) else: predictions = model.predict_proba(data)[:,1] fpr, tpr, _ = roc_curve(labels, predictions, **roc_kws ) auc_score = None if score: auc_score = roc_auc_score ( labels, predictions,) return fpr, tpr , auc_score if not is_iterable ( clfs): clfs = is_iterable ( clfs, exclude_string =True , transform =True ) # make default_colors colors = make_plot_colors(clfs, colors = colors ) # save the name of models names = make_obj_consistent_if ( names , [ get_estimator_name(m) for m in clfs ]) # check whether the model is fitted if kind=='2': fig, ax = _make_axe_multiple ( clfs, ncols = ncols , ax = ax, fig_size = fig_size ) else: if ax is None: fig, ax = plt.subplots (1, 1, figsize = fig_size ) for k, ( model, name) in enumerate (zip (clfs, names )): check_is_fitted(model ) fpr, tpr, auc_score = plot_roc(model, X, y, score) if hasattr (ax, '__len__'): if len(ax.shape)>1: i, j = k // ncols , k % ncols axe = ax [i, j] else: axe = ax[k] else: axe = ax axe.plot(fpr, tpr, label=name + ('' if auc_score is None else f"AUC={round(auc_score, 3) }") , color = colors[k] ) if kind=='2': axe.plot([0, 1], [0, 1], 'k--') axe.legend () axe.set_xlabel ("False Positive Rate (FPR)") axe.set_ylabel ("True Positive Rate (FPR)") # else: # ax.plot(fpr, tpr, label=name, color = colors[k]) if kind!='2': ax.plot([0, 1], [0, 1], 'k--') # AUC =.5 ax.set_xlabel ("False Positive Rate (FPR)") ax.set_ylabel ("True Positive Rate (FPR)") ax.legend() return ax
[docs]def plot_tensors ( z_or_edis_obj_list, /, station:int|str= 'S00', zplot:bool=False, **kwargs )-> object: #--------------------------------------- # Get station index. get_station_group = re.search ('\d+', str(station), flags=re.IGNORECASE) if get_station_group is None: raise TypeError ("Station should be or include a position number.") else : station = int(get_station_group.group()) obj_type = _assert_z_or_edi_objs (z_or_edis_obj_list) # Assert station index to be in the range of EDIlist if station >=len( z_or_edis_obj_list): raise ValueError (f"Expect {len(z_or_edis_obj_list)} stations." f" Got {station}.") # Get z objets. if obj_type =='EDI': z_obj = z_or_edis_obj_list[station].Z else: z_obj= z_or_edis_obj_list[station] #------------------------------------------- # Attributes ms = kwargs.pop('ms', 1.5) ms_r = kwargs.pop('ms_r', 3) lw = kwargs.pop('lw', .5) lw_r = kwargs.pop('lw_r', 1.0) e_capthick = kwargs.pop('e_capthick', .5) e_capsize = kwargs.pop('e_capsize', 2) color_mode = kwargs.pop('color_mode', 'color') plot_style = kwargs.pop('plot_style', 1) # color mode if color_mode == 'color': # color for data cted = kwargs.pop('cted', (0, 0, 1)) ctmd = kwargs.pop('ctmd', (1, 0, 0)) mted = kwargs.pop('mted', 's') mtmd = kwargs.pop('mtmd', 'o') # color for occam2d model if plot_style == 3: # if plot_style is 3, set default color #for model response to same as data ctem = kwargs.pop('ctem',cted) ctmm = kwargs.pop('ctmm',ctmd) else: ctem = kwargs.pop('ctem', (0, .6, .3)) ctmm = kwargs.pop('ctmm', (.9, 0, .8)) mtem = kwargs.pop('mtem', '+') mtmm = kwargs.pop('mtmm', '+') # black and white mode elif color_mode == 'bw': # color for data cted = kwargs.pop('cted', (0, 0, 0)) ctmd = kwargs.pop('ctmd', (0, 0, 0)) mted = kwargs.pop('mted', 's') mtmd = kwargs.pop('mtmd', 'o') # color for occam2d model ctem = kwargs.pop('ctem', (0.6, 0.6, 0.6)) ctmm = kwargs.pop('ctmm', (0.6, 0.6, 0.6)) mtem = kwargs.pop('mtem', '+') mtmm = kwargs.pop('mtmm', 'x') phase_limits_d = kwargs.pop('phase_limits_d', None) res_limits_d = kwargs.pop('res_limits_d', None) res_limits_od = kwargs.pop('res_limits_od', None) period_limits = kwargs.pop('period_limits', None) subplot_wspace = kwargs.pop('subplot_wspace', .3) subplot_hspace = kwargs.pop('subplot_hspace', .0) subplot_right = kwargs.pop('subplot_right', .98) subplot_left = kwargs.pop('subplot_left', .08) subplot_top = kwargs.pop('subplot_top', .85) subplot_bottom = kwargs.pop('subplot_bottom', .1) fig_size = kwargs.pop('fig_size', [6, 6]) fig_dpi = kwargs.pop('dpi', 300) ylabel_pad = kwargs.pop('ylabel_pad', 1.25) # --> set default font size font_size = kwargs.pop('font_size', 6) plt.rcParams['font.size'] = font_size fontdict = {'size': font_size + 2, 'weight': 'bold'} legend_loc = 'upper center' legend_pos = (.5, 1.18) legend_marker_scale = 1 legend_border_axes_pad = .01 legend_label_spacing = 0.07 legend_handle_text_pad = .2 legend_border_pad = .15 h_ratio = [1.5, 1, .5] gs = gridspec.GridSpec(2, 4, wspace=subplot_wspace, left=subplot_left, top=subplot_top, bottom=subplot_bottom, right=subplot_right, hspace=subplot_hspace, height_ratios=h_ratio[:2]) #------------------------------------------ # Plot data fig = plt.figure(station, fig_size, dpi= fig_dpi) plt.clf() fig.suptitle("Station {}".format(str(station)), fontdict=fontdict) axrxx = fig.add_subplot(gs[0, 0], #yscale='log' ) axrxy = fig.add_subplot(gs[0, 1], sharex=axrxx, #yscale='log' ) axryx = fig.add_subplot(gs[0, 2], sharex=axrxx, sharey=axrxy, # yscale='log' ) axryy = fig.add_subplot(gs[0, 3], sharex=axrxx, sharey=axrxx, # yscale='log' ) axpxx = fig.add_subplot(gs[1, 0]) axpxy = fig.add_subplot(gs[1, 1], sharex=axrxx) axpyx = fig.add_subplot(gs[1, 2], sharex=axrxx) axpyy = fig.add_subplot(gs[1, 3], sharex=axrxx) # convert to apparent resistivity and phase z_obj.compute_resistivity_phase() period = 1/z_obj._freq # find locations where points have been masked nzxx = np.nonzero(z_obj.z[:, 0, 0])[0] nzxy = np.nonzero(z_obj.z[:, 0, 1])[0] nzyx = np.nonzero(z_obj.z[:, 1, 0])[0] nzyy = np.nonzero(z_obj.z[:, 1, 1])[0] # convert to apparent resistivity and phase if zplot: scaling = np.zeros_like(z_obj.z) for ii in range(2): for jj in range(2): scaling[:, ii, jj] = 1. / np.sqrt(z_obj.freq) plot_res = abs(z_obj.z.real * scaling) plot_res_err = abs(z_obj.z_err * scaling) plot_phase = abs(z_obj.z.imag * scaling) plot_phase_err = abs(z_obj.z_err * scaling) h_ratio = [1.5, 1, .5] elif not zplot: plot_res = z_obj.resistivity plot_res_err = z_obj.resistivity_err plot_phase = z_obj.phase plot_phase_err = z_obj.phase_err h_ratio = [1.5, 1, .5] try: res_limits_d = (10 ** (np.floor(np.log10( min([plot_res[nzxx, 0, 0].min(), plot_res[nzyy, 1, 1].min()])))), 10 ** (np.ceil(np.log10( max([plot_res[nzxx, 0, 0].max(), plot_res[nzyy, 1, 1].max()]))))) except ValueError: res_limits_d = None try: res_limits_od = (10 ** (np.floor(np.log10( min([plot_res[nzxy, 0, 1].min(), plot_res[nzyx, 1, 0].min()])))), 10 ** (np.ceil(np.log10( max([plot_res[nzxy, 0, 1].max(), plot_res[nzyx, 1, 0].max()]))))) except ValueError: res_limits_od = None # --> make key word dictionaries for plotting kw_xx = {'color': cted, 'marker': mted, 'ms': ms, 'ls': ':', 'lw': lw, 'e_capsize': e_capsize, 'e_capthick': e_capthick} kw_yy = {'color': ctmd, 'marker': mtmd, 'ms': ms, 'ls': ':', 'lw': lw, 'e_capsize': e_capsize, 'e_capthick': e_capthick} # ---------plot the apparent resistivity----------------------------------- # plot each component in its own subplot # plot data response erxx = plot_errorbar(axrxx, period[nzxx], plot_res[nzxx, 0, 0], plot_res_err[nzxx, 0, 0], **kw_xx) erxy = plot_errorbar(axrxy, period[nzxy], plot_res[nzxy, 0, 1], plot_res_err[nzxy, 0, 1], **kw_xx) eryx = plot_errorbar(axryx, period[nzyx], plot_res[nzyx, 1, 0], plot_res_err[nzyx, 1, 0], **kw_yy) eryy = plot_errorbar(axryy, period[nzyy], plot_res[nzyy, 1, 1], plot_res_err[nzyy, 1, 1], **kw_yy) # plot phase plot_errorbar(axpxx, period[nzxx], plot_phase[nzxx, 0, 0], plot_phase_err[nzxx, 0, 0], **kw_xx) plot_errorbar(axpxy, period[nzxy], plot_phase[nzxy, 0, 1], plot_phase_err[nzxy, 0, 1], **kw_xx) plot_errorbar(axpyx, period[nzyx], plot_phase[nzyx, 1, 0], plot_phase_err[nzyx, 1, 0], **kw_yy) plot_errorbar(axpyy, period[nzyy], plot_phase[nzyy, 1, 1], plot_phase_err[nzyy, 1, 1], **kw_yy) # get error bar list for editing later #_err_list = try: [[erxx[1][0], erxx[1][1], erxx[2][0]], [erxy[1][0], erxy[1][1], erxy[2][0]], [eryx[1][0], eryx[1][1], eryx[2][0]], [eryy[1][0], eryy[1][1], eryy[2][0]]] line_list = [[erxx[0]], [erxy[0]], [eryx[0]], [eryy[0]]] except IndexError: print('Found no Z components for {0}'.format(station)) line_list = [[None], [None], [None], [None]] # ------------------------------------------ # # make things look nice # # set titles of the Z components ax_list = [axrxx, axrxy, axryx, axryy, axpxx, axpxy, axpyx, axpyy] label_list = [['$Z_{xx}$'], ['$Z_{xy}$'], ['$Z_{yx}$'], ['$Z_{yy}$']] # for ax, label in zip(ax_list[0:4], label_list): # ax.set_title(label[0], fontdict={'size': font_size + 2, # 'weight': 'bold'}) # # set axis properties for aa, ax in enumerate(ax_list): ax.tick_params(axis='y', pad=ylabel_pad) # if self.plot_tipper==False: if aa < 4: if zplot == True: ax.set_yscale('log', #nonposy='clip' ) else: ax.set_xlabel('Period (s)', fontdict=fontdict ) if aa < 8: if zplot == True: ax.set_yscale('log', # nonposy='clip' ) else: ax.set_xlabel('Period (s)', fontdict=fontdict) if aa < 4 and zplot is False: ylabels = ax.get_yticks().tolist() ylabels[0] = '' ax.yaxis.set_major_locator(mpl.ticker.FixedLocator(ylabels)) ax.set_yticklabels([ str(f) for f in ylabels]) ax.set_yscale('log', #nonposy='clip' ) try: # skip setting the axis limits if aa == 0 or aa == 3: ax.set_ylim(res_limits_d) elif aa == 1 or aa == 2: ax.set_ylim(res_limits_od) except: pass if aa > 3 and aa < 8 and zplot is False: #ax.yaxis.set_major_locator(MultipleLocator(10.0)) if phase_limits_d is not None: ax.set_ylim(phase_limits_d) # set axes labels if aa == 0: if zplot == False: ax.set_ylabel('App. Res. ($\mathbf{\Omega \cdot m}$)', fontdict=fontdict) elif zplot == True: ax.set_ylabel('Re[Z (mV/km nT)]', fontdict=fontdict) elif aa == 4: if zplot == False: ax.set_ylabel('Phase (deg)', fontdict=fontdict) elif zplot == True: ax.set_ylabel('Im[Z (mV/km nT)]', fontdict=fontdict) elif aa == 8: ax.set_ylabel('Tipper', fontdict=fontdict) if aa > 7: ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(.1)) ax.set_xscale('log', # nonposx='clip' ) # set period limits if period_limits is None: period_limits = (10 ** (np.floor(np.log10(period[0]))) * 1.01, 10 ** (np.ceil(np.log10(period[-1]))) * .99) ax.set_xlim(xmin=period_limits[0], xmax=period_limits[1]) ax.grid(True, alpha=.25) ylabels = ax.get_yticks().tolist() if aa < 8: ylabels[-1] = '' ylabels[0] = '' if aa < len(ax_list)//2: plt.setp(ax.get_xticklabels(), visible=False) # --> make key word dictionaries for plotting kw_xx = {'color': ctem, 'marker': mtem, 'ms': ms_r, 'ls': ':', 'lw': lw_r, 'e_capsize': e_capsize, 'e_capthick': e_capthick} kw_yy = {'color': ctmm, 'marker': mtmm, 'ms': ms_r, 'ls': ':', 'lw': lw_r, 'e_capsize': e_capsize, 'e_capthick':e_capthick} legend_ax_list = ax_list[0:4] for aa, ax in enumerate(legend_ax_list): ax.legend(line_list[aa], label_list[aa], loc=legend_loc, bbox_to_anchor=legend_pos, markerscale=legend_marker_scale, borderaxespad=legend_border_axes_pad, labelspacing=legend_label_spacing, handletextpad=legend_handle_text_pad, borderpad=legend_border_pad, framealpha=1, prop={'size': max([font_size, 5])}) plt.show() return z_obj
plot_tensors.__doc__="""\ Plot resistivity and phase tensors or the real and imaginary impedance. Plots the real and imaginary impedance and induction vector if present. Parameters ------------ z_or_edis_obj_list: list of :class:`watex.edi.Edi` or \ :class:`watex.externals.z.Z` A collection of EDI- or Impedances tensors objects. station: int, default='S00' Station to visualize the resistivity, phases or impendances tensors. Default is the first station. Note that station counting start from index equal to ``0``. zplot: bool, default=False, Visualize the impedance tensors values `Z`. kwargs: Additional keywords arguments To get further details about the way to control the plot, refer to the following attributes. ======================== ================================================== Attributes Description ======================== ================================================== color_mode [ 'color' | 'bw' ] color or black and white plots cted color for data Z_XX and Z_XY mode ctem color for model Z_XX and Z_XY mode ctmd color for data Z_YX and Z_YY mode ctmm color for model Z_YX and Z_YY mode data_fn full path to data file data_object WSResponse instance e_capsize cap size of error bars in points (*default* is .5) e_capthick cap thickness of error bars in points (*default* is 1) fig_dpi resolution of figure in dots-per-inch (300) fig_list list of matplotlib.figure instances for plots fig_size size of figure in inches (*default* is [6, 6]) font_size size of font for tick labels, axes labels are font_size+2 (*default* is 7) legend_border_axes_pad padding between legend box and axes legend_border_pad padding between border of legend and symbols legend_handle_text_pad padding between text labels and symbols of legend legend_label_spacing padding between labels legend_loc location of legend legend_marker_scale scale of symbols in legend lw line width data curves (*default* is .5) ms size of markers (*default* is 1.5) lw_r line width response curves (*default* is .5) ms_r size of markers response curves (*default* is 1.5) mted marker for data Z_XX and Z_XY mode mtem marker for model Z_XX and Z_XY mode mtmd marker for data Z_YX and Z_YY mode mtmm marker for model Z_YX and Z_YY mode phase_limits limits of phase plot_component [ 2 | 4 ] 2 for TE and TM or 4 for all components plot_style [ 1 | 2 ] 1 to plot each mode in a seperate subplot and 2 to plot xx, xy and yx, yy in same plots plot_type [ '1' | list of station name ] '1' to plot all stations in data file or input a list of station names to plot if station_fn is input, otherwise input a list of integers associated with the index with in the data file, ie 2 for 2nd station plot_z [ True | False ] *default* is True to plot impedance, False for plotting resistivity and phase plot_yn [ 'n' | 'y' ] to plot on instantiation res_limits limits of resistivity in linear scale resp_fn full path to response file resp_object WSResponse object for resp_fn, or list of WSResponse objects if resp_fn is a list of response files station_fn full path to station file written by WSStation subplot_bottom space between axes and bottom of figure subplot_hspace space between subplots in vertical direction subplot_left space between axes and left of figure subplot_right space between axes and right of figure subplot_top space between axes and top of figure subplot_wspace space between subplots in horizontal direction ======================== ================================================== Examples --------- >>> import watex as wx >>> edi_data = wx.fetch_data ('edis', samples= 17 , return_data =True ) >>> wx.utils.plotutils.plot_tensors ( edi_data, station =4 ) """ #XXX TODO
[docs]def plot_rsquared (X , y, y_pred, ): from sklearn.metrics import r2_score # Calculate R-squared r_squared = r2_score(y, y_pred) # Plotting the scatter plot plt.scatter(X, y, color='blue', label='Actual data') # Plotting the regression line plt.plot(X, y_pred, color='red', linewidth=2, label='Fitted line') # Annotate the R-squared value on the plot plt.text(0.5, 0.5, 'R-squared = {:.2f}'.format(r_squared), fontsize=12, ha='center') # Adding labels and title plt.xlabel('Predictor') plt.ylabel('Target') plt.title('R-squared Diagram') plt.legend() # Show the plot plt.show()
[docs]def plot_sounding ( ves, /, style = 'bmh', fig_size = (10, 4), cz_plot_kws= None, marker_kws= None, savefig =None, ax=None, fig=None, **plot_kws ): """ Visualize the vertical electrical sounding. Function plots the sounding curve from AB/2 sounding points. Parameters ----------- ves: array_like 1d The vertical electrical resistivity sounding array. If dataframe is passed,`resistivity` column must be included. style: str, default='bmh' Matplotlib plottings style. fig_size: tuple, default= (10, 4) Matplotlib figure size. marker_kws: dict, default = {'marker':'o', 'c':'#9EB3DD' } The dictionnary to customize marker in the plot cz_plot_kws: dict, default = {'ls':'-','c':'#0A4CEE', 'lw'L2 } The dictionnary to customize the conductize zone in the plot. savefig: str, optional Save figure name. The default resolution dot-per-inch is ``300``. ax: Matplotlib.pyplot.Axes, optional Axe to collect the figure. fig: Matplotlib.pyplot.figure, optional Supply fig to save automatically the plot, otherwise, keep it to ``None``. plot_kws: dict, Additional keyword arguments passed to :func:`matplotlib.pyplot.plot` function Return -------- ax: Matplotlib.pyplot.Axis Return axis See also --------- watex.utils.exmath.plotOhmicArea: plot the Ohmic Area including the computed fracture zone. Examples ---------- >>> from watex.datasets import make_ves >>> from watex.utils.plotutils import plot_sounding >>> import matplotlib.pyplot as plt >>> fig, ax = plt.subplots ( 2, 1, figsize = (10, 10)) >>> d= make_ves (samples =56, seed = 42) >>> plot_sounding (d.resistivity, ax =ax [0], color ='k', marker ='D', ) >>> ax[0].set_title ("VES: samples=56, seed =42") >>> # read the frame and get the resistivity values >>> ax[1] = plot_sounding(make_ves (order ='+', max_rho =1e4, seed =65 , as_frame=True,iorder =5), ax= ax[1], ls=':', marker ='o', color ='blue') >>> ax[1].set_title ("VES:samples=41, order='+', iorder=5," " max_rho=10000.$\Omega.m$, seed=65") """ plt.style.use (style ) if hasattr ( ves , 'columns') and hasattr ( ves , '__array__'): if 'resistivity' not in ves.columns : raise TypeError ("Missing resistivity column in the data.") ves = ves.resistivity ves = check_y (ves , input_name ="sample of VES data") if ax is None: fig, ax = plt.subplots(1,1, figsize =fig_size) leg =[] zl, = ax.semilogy(np.arange(len(ves)), ves, label ='Vertical Electrical Resistivity', **plot_kws ) marker_kws = marker_kws or dict (marker ='o', c='#9EB3DD' ) ax.scatter (np.arange(len(ves)), ves, **marker_kws ) leg.append(zl) ax.set_xticks(range(len(ves))) _get_xticks_formatage (ax, ax.get_xticks() , auto =True, rotation=0) # for label in ax.xaxis.get_ticklabels()[::7]: # label.set_visible(False) ax.set_xlabel('AB/2(m)') ax.set_ylabel('App.resistivity ($\Omega.m$)') ax.legend( handles = leg, loc ='best') ax.set_xlim ([-1, len(ves)]) if savefig is not None: savefigure (fig, savefig, dpi = 300) plt.close () if savefig is not None else plt.show() return ax
# import watex as wx # lspath =r'C:\Users\Daniel\Desktop\projects\nanshaLS0.csv' # ls_data = wx.read_data (lspath , sanitize =True, sep =';', verbose =True ) # ls_data2 = las_data.copy() ; sd = ls_data2.replace(',', '.') # ls_data2 = ls_data.copy() ; sd = ls_data2.replace(',', '.') # ls_data2 [ls_data2.columns].repalce (',', '.', inplace =True ) # ls_data2 [ls_data2.columns].replace (',', '.', inplace =True ) # ls_data = wx.read_data (lspath, sanitize =True, sep =';', decimal =',') # ls_data = wx.to_numeric_dtypes (ls_data ) # test = ls_data ['latitude'] + ls_data['longitude'] # ls_data.to_csv ( r'C:\Users\Daniel\Desktop\projects\nsh.lsdata.csv', index =False ) # ## ---(Mon Nov 6 12:24:49 2023)--- # import watex as wx # lspath = r'C:\Users\Daniel\Desktop\projects\nsh.lsdata.csv' # ls_data = wx.read_data (lspath , sanitize =True ) # ls_data.shape # data = wx.utils.random_sampling ( ls_data , samples ='30%') # data.shape # test_data = data.copy() # test_xgb = data [['longitude', 'latitude', '2022']] # import numpy as np ; value_r = np.linspace (0.89 , 0.96 , test-data.shape [0]) # import numpy as np ; value_r = np.linspace (0.89 , 0.96 , test_data.shape [0]) # value_xgb = np.random.shuffle ( value_r ) * test_xgb[['2022']] # value_xgb = value_r .copy() # vlue_xgb = test_xgb[['2022']]* value_xgb # value_xgb.shape # vlue_xgb = test_xgb[['2022']]* wx.reshape (value_xgb, 1) # vlue_xgb = test_xgb[['2022']].values * value_xgb # test_data = wx.fetch_data('edis', samples = 15 ).frame