Source code for pyobistools.validation.check_onland

# Problems with importing the obisqc solution, use pyxylookup directly.
import pyxylookup as xy
import warnings
import geopandas as gpd
import pandas as pd

# alternate idea for shapefiles from Bio Mobilization workshop module 6 example
import cartopy.io.shapereader as shpreader
from shapely.ops import unary_union
from shapely.prepared import prep


[docs] def check_onland(data, land=None, report=False, buffer=0, offline=False): """ Check which points are likely to be located on land @param data a Pandas dataframe with the data @param land A custom land polygon to check against. If not provided, use Natural Earth. @param report If True, errors returned instead of records @param buffer Set how far inland points are still to be deemed valid @param offline if True, a local simplified shoreline is used, otherwise an OBIS webservice is used. Default is False @return Errors or problematic records """ # Catch any troubles with the lat/lon data itself first # errors = check_lonlat(data, report) errors = [] if len(errors) > 0 and report: return errors # Make a geopandas GeoSeries of the now confirmed not-bad data. gdf = gpd.GeoDataFrame( data, geometry=gpd.points_from_xy(data.decimalLongitude, data.decimalLatitude) ) if land is not None and (not offline): warnings.warn("land parameter not supported when in offline mode") if buffer != 0 and offline: warnings.warn("buffer parameter not used when in online mode") # Offline with no specified land polygons - set up the default land polygon set if offline and land is None: land_shp_fname = shpreader.natural_earth( resolution="10m", category="physical", name="land" ) land_geom = unary_union(list(shpreader.Reader(land_shp_fname).geometries())) land = prep(land_geom) # TODO: what if we have specified a land polygon? # Offline in all cases if offline: # spatial query to find overlaps between each lat/lon in my dataset and the land polygons # try using geopandas.GeoSeries.contains() for index, row in gdf.iterrows(): gdf.loc[index, "on_land"] = land.contains(row.geometry) else: # we're in online mode - use xylookup.lookup() shoredistance = xy.lookup( data[["decimalLongitude", "decimalLatitude"]].to_numpy(), shoredistance=True, grids=True, areas=True, asdataframe=True, ) # gdf on_land needs to be a boolean series. # multiply by the buffer amount, defaults to zero so right side evals zero without a buffer value gdf["on_land"] = shoredistance["shoredistance"] < (-1 * buffer) # Are we returning a report or the offending data? if report: if len(gdf) > 0: return gdf[ gdf["on_land"] ] else: # the function returns an empty dataframe return pd.DataFrame(index=data.index, columns=data.columns) # return empty dataframe in the same shape, as per the R implementation else: # if we are not returning a report, return the offending rows themselves. return gdf[gdf["on_land"]]