onset

Onset.

Onset is a company that manufactures data loggers and sensors for environmental monitoring. Their Hobo data loggers are widely used for monitoring water quality parameters such as temperature, conductivity, and light intensity. The present module provides parsers for the different data formats generated by the HOBOware and HOBOconnect softwares.

`csv(path, convert_units_to_si=True, standardize_variable_names=True, encoding='UTF-8', errors='strict', timezone=None, ambiguous_timestamps='raise')` ¶

Parses the Onset CSV format generate by HOBOware into a xarray object.

Parameters:

Name	Type	Description	Default
`path`	`str`	The path to the CSV file	required
`convert_units_to_si`	`bool`	Whether to standardize data units to SI units	`True`
`standardize_variable_names`	`bool`	Rename the variable names a standardize name convention	`True`
`encoding`	`str`	File encoding. Defaults to "utf-8"	`'UTF-8'`
`errors`	`str`	Error handling. Defaults to "strict"	`'strict'`
`timezone`	`str`	Timezone to localize the time variable, overwrites the timezone in header	`None`
`ambiguous_timestamps`	`str`	How to handle ambiguous time stamps. Defaults to "raise"	`'raise'`

Returns: xarray.Dataset

Source code in ocean_data_parser/parsers/onset.py

def csv(
    path: str,
    convert_units_to_si: bool = True,
    standardize_variable_names: bool = True,
    encoding: str = "UTF-8",
    errors: str = "strict",
    timezone: str = None,
    ambiguous_timestamps: str = "raise",
) -> xarray.Dataset:
    """Parses the Onset CSV format generate by HOBOware into a xarray object.

    Args:
        path: The path to the CSV file
        convert_units_to_si: Whether to standardize data units to SI units
        standardize_variable_names: Rename the variable names a standardize name
            convention
        encoding: File encoding. Defaults to "utf-8"
        errors: Error handling. Defaults to "strict"
        timezone: Timezone to localize the time variable, overwrites the timezone in header
        ambiguous_timestamps: How to handle ambiguous time stamps. Defaults to "raise"
    Returns:
        xarray.Dataset
    """
    raw_header = []
    line = ""
    with open(
        path,
        encoding=encoding,
        errors=errors,
    ) as f:
        while "Date Time" not in line and len(raw_header) < 10:
            line = f.readline()
            raw_header.append(line)
        first_row = f.readline()
    if "Date Time" not in raw_header[-1]:
        raise ValueError("Date Time column not found in header")

    # Parse onset header
    header, variables = _parse_onset_csv_header(raw_header)
    date_column_index = list(variables.keys()).index("Date Time")
    date_format = _get_time_format(first_row.split(",")[date_column_index])

    # Inputs to pd.read_csv
    consider_columns = {
        var: id
        for id, var in enumerate(variables.keys())
        if var.lower().replace(" ", "_") not in IGNORED_VARIABLES
    }
    df = pd.read_csv(
        path,
        na_values=[" "],
        skiprows=list(range(len(raw_header))),
        parse_dates=["Date Time"],
        date_format=date_format,
        sep=",",
        header=None,
        memory_map=True,
        names=consider_columns.keys(),
        usecols=consider_columns.values(),
        encoding_errors=errors,
        encoding=encoding,
    )

    # Add timezone to time variables
    if df["Date Time"].dtype == "object":
        logger.warning(
            "Date Time column is not in a consistent format. Trying to convert"
        )
        df["Date Time"] = df["Date Time"].apply(
            lambda x: pd.to_datetime(x, format=_get_time_format(x))
        )
    df["Date Time"] = df["Date Time"].dt.tz_localize(
        timezone or header["timezone"], ambiguous=ambiguous_timestamps
    )
    check_daylight_saving(df["Date Time"], ambiguous_timestamps)

    # Convert to dataset
    ds = df.to_xarray()
    ds.attrs = {**GLOBAL_ATTRIBUTES, **header}
    for var in ds:
        ds[var].attrs = variables[var]

    if standardize_variable_names:
        ds = ds.rename_vars(_standardized_variable_mapping(ds))
        # Detect instrument type based on variables available
        ds.attrs["instrument_type"] = _detect_instrument_type(ds)

    # # Review units and convert SI system
    if convert_units_to_si:
        if standardize_variable_names:
            if "temperature" in ds and ("C" not in ds["temperature"].attrs["units"]):
                logger.warning("Temperature in Farenheit will be converted to celsius")
                ds["temperature"] = _farenheit_to_celsius(ds["temperature"])
                ds["temperature"].attrs["units"] = "degC"
                ds.attrs["history"] += " ".join(
                    [
                        f"{datetime.now()}",
                        f"Convert temperature ({ds['temperature'].attrs['units']}) to"
                        "degree Celsius [(degF-32)/1.8000]",
                    ]
                )
            if (
                "conductivity" in ds
                and "uS/cm" not in ds["conductivity"].attrs["units"]
            ):
                logger.warning(
                    "Unknown conductivity units (%s)", ds["conductivity"].attrs["units"]
                )
        else:
            logger.warning(
                "Unit conversion is not supported if standardize_variable_names=False"
            )

    ds = standardize_dataset(ds)
    return ds

`xlsx(path, timezone=None, ambiguous_timestamps='infer')` ¶

Parses the Onset XLSX format generate by HOBOware into a xarray object.