Source code for watex.datasets.gdata

# -*- coding: utf-8 -*-
#   License: BSD-3-Clause
#   Author: LKouadio 

from __future__ import annotations 
import warnings 
import numpy as np 
import pandas as pd 
from ..utils.coreutils import  makeCoords 
from ..utils.funcutils import _assert_all_types,  is_iterable
from ..utils.gistools import ( 
    ll_to_utm, project_points_ll2utm, project_point_utm2ll) 
from ..utils.exmath import fitfunc
from ..utils.box import Boxspace 

__all__ =["make_erp", "make_ves"] 

[docs] def make_erp ( *, n_stations:int= 42, max_rho:float= 1e3 , min_rho:float= 1e0, step:float=20., reflong:float|str='110:29:09.00', reflat:float|str='26:03:05.00' , utm_zone:str='29N', order:str='+', full_coordinates:bool=True, raise_warning:bool=False, as_frame:bool=False, seed:int=None, is_utm:bool=False, epsg: int =None, **coord_kws ): r""" Generate Electrical Resistivity Profiling (ERP) data from stations and coordinates points. To generate samples from specific area, it is better to provide both latitude and longitude values from a single station of this area as arguments passed to parameters `reflat` and `reflong` respectively. Also specify the `utm_zone` for the lat/lon coordinates conversion into UTM if necessary. If not useful, can turn off the parameter `full_coordinates` to ``False``. Parameters ----------- n_stations: int, default=42 number of measurements stations max_rho: float, default=1e3 maximum resistivity value on the survey area in :math:`\Omega.m` min_rho: float, default=1e0 minimum resistivity value on the survey area in :math:`\Omega.m` reflong: float or string or list of [start, stop], default='110:29:09.00' Reference longitude in degree decimal or in DD:MM:SS for the first station considered as the origin of the landmark. reflat: float or string or list of [start, stop], default='26:03:05.00' Reference latitude in degree decimal or in DD:MM:SS for the reference site considered as the landmark origin. If value is given in a list, it can contain the start point and the stop point. step: float or str , default=20 Offset or the distance of seperation between different sites in meters. If the value is given as string type, except the ``km``, it should be considered as a ``m`` value. Only meters and kilometers are accepables. order: str , default='-' Direction of the projection line. By default the projected line is in ascending order i.e. from SW to NE with angle `r` set to ``45`` degrees. Could be ``-`` for descending order. Any other value should be in ascending order. utm_zone: string (##N or ##S), default='29N' utm zone in the form of number and North or South hemisphere, 10S or 03N Must be given if `utm2deg` is set to ``True``. full_coordinates: bool, default=True, Convert latitude and longitude to approximate UTM values. Easting and northing are gotten using the reference ellipsoid =23 with WGS84. If ``False``, easting and northing are not computed and set to null. raise_warning: bool, default=True, Raises warnings if :term:`GDAL` is not set or the coordinates accurately status. as_frame: bool, default=False, if ``True``, outputs the data into as a pandas dataframe, :class:`~watex.utils.box.Boxspace` object otherwise. seed: int, Optional, It allows reproducing the same data. If value is passed, it reproduces the same data at that sample points. is_utm: bool, default=False Type of coordinates passed to `reflat` and reflong` params for generating `longitude-latitude` coordinates. If `is_utm` is explicity set to ``True``, that means values `reflong` and `reflat` arein UTM coordinates. Then the conversion to `longitude-latitude` should be operated. However if `is_utm` is ``False`` when `reflat` and `reflong` values are greater than ``90`` and ``180`` degrees respectively, an errors should raise. .. versionadded:: 0.2.1 epsg: int, str, Optional EPSG number defining projection. See http://spatialreference.org/ref/ for moreinfo. Overrides utm_zone if both are provided coord_kws: dict, Additional keywords passed to :func:`~watex.utils.coreutils.makeCoords`. Returns -------- (pd.Dataframe | :class:`~watex.utils.box.Boxspace` ) Examples ---------- >>> from watex.datasets.gdata import make_erp >>> erp_data = make_erp (n_stations =50 , step =30 , as_frame =True) >>> erp_data.head(3) Out[256]: station longitude latitude easting northing resistivity 0 0 -13.488511 0.000997 668210.580864 110.183287 225.265306 1 30 -13.488511 0.000997 668210.581109 110.183482 327.204082 2 60 -13.488510 0.000997 668210.581355 110.183676 204.877551 """ stations = np.arange (0 , n_stations * step , step ) resistivity =np.abs(np.linspace(min_rho, max_rho , n_stations) ) if seed is not None: np.random.seed(seed) np.random.shuffle(resistivity) rlons, rlats = makeCoords(reflong, reflat, nsites=n_stations, utm_zone =utm_zone, step = step, is_utm= is_utm, raise_warning=raise_warning, epsg = epsg , **coord_kws) easting = np.zeros_like(rlons) ; northing = np.zeros_like(rlats) if full_coordinates: try : # projection is more consistent than using the # the reference ellipsoid. If error occurs then use # the default computation. en = [project_points_ll2utm(lat= lat, lon= lon, utm_zone= utm_zone, epsg=epsg ) for lat, lon in zip (rlats, rlons)] except: en = [ll_to_utm (23 , lat , lon ) for lat, lon in zip (rlats, rlons)] _, easting , northing = zip (*en) d = { "station": stations, "longitude":rlons, "latitude": rlats, "easting": easting, "northing": northing, "resistivity": resistivity } dx ={"frame": pd.DataFrame (d), "data": pd.DataFrame (d).values } data = Boxspace(**d, **dx ) return data.frame if as_frame else data
[docs] def make_ves ( *, samples:int= 31, min_rho:float=1e1, max_rho:float= 1e3, max_depth:float= 100., order:str='-', as_frame:bool=False, seed:int=None, iorder:int=3, xy = None, is_utm=False, add_xy:bool =False, utm_zone=None, epsg: int=None ): r""" Generate Vertical Electrical Sounding (VES) data from pseudo-depth measurements. For a large pseudo-depth measurements, one can change the number of samples to a large values. The default samples presumed collected is ``samples=31`` measurements in deeper. Parameters ----------- samples: int, default=42 number of measurements depth AB/2 in meters. max_rho: float, default=1e3 maximum resistivity value expected in deeeper on the survey area in :math:`\\Omega.m` min_rho: float, default=1e1 minimum resistivity value expected in deeper on the survey area in :math:`\\Omega.m` order: str , default='-' Direction of the projection line. By default the projected line is in ascending order i.e. from SW to NE with angle `r` set to ``45`` degrees. Could be ``-`` for descending order. Any other value should be in ascending order. max_depth: float, default=100 Value of the measurement in deeper expected to reach by AB/2 in meters. as_frame: bool, default=False, if ``True``, outputs the data into as a pandas dataframe, :class:`~watex.utils.box.Boxspace` object otherwise. seed: int, Optional, It allows reproducing the same data. If value is passed, it reproduces the same data at that sample points. iorder: int, default=3 Inflexion order. It is a positive value greater than 0. If ``None``, it should be computed using the length of extrema (local + global). It also might be lower as possible to let the fitting VES curve more realistic. xy: tuple, optional Coordinates point ( easting, northing ) or (lon, lat) corresponding to the :term:`VES` points ``sves``. If coordinates values are not given coordinates are randomly generated into (lon, lat) and stored into the attribute `xy`. To returns the xy auto-coordinates when ``as_frame=True`` set `add_xy` to ``True``. .. versionadded:: 0.2.1 is_utm: bool, default=False In principle, `xy` expects to be in `longitude-latitude` coordinates. However if coordinates are passed into a UTM such as `easting-northing`, user can specify the `utm_zone` to convert the `xy` values into a valid longitude and latitude coordinates. add_xy: bool, default=False Add `xy` coordinates to the :term:`VES` dataframe. utm_zone: str, Optional To generate coordinates `xy` from a specific zone, `utm_zone` can be specified, otherwise ``29N`` is used instead. epsg: int, str, Optional EPSG number defining projection. See http://spatialreference.org/ref/ for moreinfo. Overrides utm_zone if both are provided Returns --------- (pd.Dataframe | :class:`~watex.utils.box.Boxspace` ) Notes ------- when returning the :class:`~watex.utils.box.Boxspace` object, each columns of 'VES' data can be retrieved as an attributes. Check the examples below Examples --------- >>> from watex.datasets.gdata import make_ves >>> b = make_ves (samples =50 , order ='+') # 50 measurements in deeper >>> b.resistivity [:-7] Out[314]: array([429.873 , 434.255 , 438.5707, 442.8203, 447.0042, 451.1228, 457.5775]) >>> b.frame.head(3) Out[315]: AB MN resistivity 0 1.0 0.6 429.872999 1 2.0 0.6 434.255018 2 3.0 0.6 438.570675 >>> ves_data = make_ves (samples =50 , min_rho =10, max_rho =1e5 , as_frame =True, add_xy= True , xy = ( 3143965.855 , 336704.455) , is_utm = True , utm_zone = '49N', epsg =None) >>> ves_data.head(2) Out[316]: AB MN resistivity longitude latitude 0 1.0 0.6 51544.426685 107.901553 -61.802165 1 2.0 0.6 51420.739513 107.901553 -61.802165 """ ix , mnv= [ 4, 16 , 26 , 31 ], [.4 , 1., 5., 10.] abv = [1, 2, 4, 10 ] samples = int (_assert_all_types(samples, int, float, objname="Samples")) if samples < 8: warnings.warn("Expects at least samples be greater than 7." f" Got '{samples}'") samples = 8 ix = np.array ( np.array (ix) * samples / 31 , dtype =int ) mnv = np.array(mnv, dtype =float ) * samples / 31 abv = np.array(abv, dtype =float ) * samples / 31 MN_max = max_depth //10 # MN = _generate_ABMN_samples(indexes= ix, fixed_values=mnv, threshold_multiplicator= MN_max ) AB = _generate_ABMN_samples(indexes = ix , fixed_values=abv, threshold_multiplicator=MN_max, kind ="AB") # make resistivity g = np.linspace ( max_rho, min_rho , samples ) if str(order) =='-' else np.linspace ( min_rho, max_rho, samples ) if seed is not None: np.random.seed(seed ) np.random.shuffle(g) iorder = int ( _assert_all_types(iorder, int, float, objname ="Inflexion order 'iorder'") ) if iorder <=0: raise ValueError ("Inflexion order 'iorder' expects a positive" f" number greater than 0. Got '{iorder}'.") f, *_ = fitfunc (AB, g, deg= iorder ) resistivity = np.abs(f (AB )) d= {"AB":AB, "MN": MN, "resistivity":resistivity} dx ={"frame": pd.DataFrame (d), "data": pd.DataFrame (d).values } # # fetch_random coordinate for sves or manage xy coordinates xy, is_utm = _manage_xy_coordinates( xy , is_utm = is_utm , utm_zone = utm_zone, epsg = epsg ) # now add_xy if add_xy : xname = "easting" if is_utm else 'longitude' yname = "northing" if is_utm else 'latitude' d [xname] = xy if np.isnan (xy).any() else xy [0] d [yname]= xy if np.isnan (xy).any() else xy [-1] # insert longitude and latitude in dataframe dx['frame'][xname] = d [xname] dx['frame'][yname] = d [yname] dx ['xy']= xy data = Boxspace(**d, **dx , utm_zone = utm_zone or '29N') return data.frame if as_frame else data
def _generate_ABMN_samples ( indexes, fixed_values, threshold_multiplicator, kind ='MN'): """ Isolated part for 'VES' data generating. """ # use index MN for test MN=list() nsam = np.array (np.diff (indexes ), dtype = int ) for ii, (ind, v) in enumerate (zip (indexes, fixed_values)): if ii==0: s = [ v * threshold_multiplicator / 10 for i in range ( ind ) ] if kind =="MN" else list( np.linspace (1, indexes [0], indexes [0])) else: s = [v* threshold_multiplicator / 10 for i in range ( indexes[ii -1], indexes[ii])] if kind=="MN" else list( np.arange (0, nsam[ii-1] * v, v ) + s[-1] + fixed_values[ii-1]) MN.extend (s) return np.around (MN, 1 ) def _manage_xy_coordinates ( xy= None, *, is_utm =False, utm_zone = None, epsg=None ): """ Manage the coordinates or generate random coordinates. Isolated part of `make_ves`. """ # fetch_random coordinate for sves if xy is not None: xy_init = xy # make a copy if isinstance ( xy, str): xy = is_iterable (xy, parse_string= True, transform = True ) xy = is_iterable(xy, exclude_string= True, transform = True ) if len(xy )!=2 : # in the case long value is given warnings.warn( "Unexpected xy coordinates. xy should be a tuple of" " (easting, northing) or (longitude, latitude)." f" Got {xy_init}. Can't use it. Set to NaN instead.") xy = np.nan if is_utm and not np.isnan(xy).any() : if ( utm_zone is None and epsg is None ): warnings.warn ("GISError: Need to input either UTM zone or " "EPSG number for an accurate conversion. Use " "default `29N` instead.") # is given into longitude latitude # so reverse it yx = project_point_utm2ll(*xy[::-1] , utm_zone = utm_zone or '29N' , epsg= epsg ) xy = yx [::-1] # reverse back to longitude latitude is_utm=False # conversion is done if xy is None: # collect 7 sites and fetch randomly # one site # turn off warnings about # using the x, y = makeCoords (3143965.855 , 336704.455 , utm_zone= utm_zone or '29N', nsites= 7 , is_utm=True , raise_warning= False ) sves_ix = np.random.choice ( np.arange ( len(x))) xy = ( x [sves_ix], y [sves_ix]) # block to is_utm equals to False # since values are alread be converted to lonlat is_utm =False return xy , is_utm