Skip to content

electricblue

ElectricBlue is a non-profit technology transfer startup creating research-oriented solutions for the scientific community.

csv(path, encoding='utf-8')

ElectricBlue csv data format parser

Parameters:

Name Type Description Default
path str

path to the csv file to parse

required
encoding str='UTF-8'

file encoding

'utf-8'

Returns:

Type Description
Dataset

xarray.Dataset

Source code in ocean_data_parser/parsers/electricblue.py
def csv(
    path: str,
    encoding: str = "utf-8",
) -> xarray.Dataset:
    """ElectricBlue csv data format parser

    Args:
        path (str): path to the csv file to parse
        encoding (str='UTF-8', optional): file encoding

    Returns:
        xarray.Dataset
    """
    with open(path, encoding=encoding) as f:
        line = True
        metadata = GLOBAL_ATTRIBUTES
        metadata["source_file"] = path

        while line:
            line = f.readline()
            metadata["source_file_header"] += line
            line = line.strip()

            if re.match(r"^[-,\s]+$", line):
                continue
            elif line.startswith("time,"):
                break
            else:
                items = line.split(", ", 1)
                key = items[0]
                value = items[1] if len(items) == 2 else ""

                attr = re.sub(r"[\s\[\]\(\)\-]+", "_", key.lower())
                attr = re.sub(r"__+", "_", attr)
                attr = re.sub(r"_$", "", attr)

                # cast value
                if re.match(r"^[+-]*\d+$", value):
                    value = int(value)
                elif re.match(r"^[+-]*\d+\.\d+$", value):
                    value = float(value)

                metadata[attr] = value

        columns = line.split(",")
        time_zone = metadata.pop("time_zone")
        df = pd.read_csv(
            f,
            sep=",",
            header=None,
            names=columns,
            converters={0: lambda x: pd.to_datetime(x + time_zone, utc=True)},
        )
        if len(df) != metadata["samples"]:
            logger.warning(
                "Parsed data samples=%s do not match expected samples=%s",
                str(len(df)),
                metadata["samples"],
            )

        # Convert to xarray dataset
        ds = df.to_xarray()

        # Global attributes
        ds.attrs = metadata
        ds.attrs.update(
            {
                "instrument_type": ds.attrs.get("envlogger_version"),
                "instrument_sn": ds.attrs.get("serial_number"),
            }
        )
        ds["latitude"] = ds.attrs["lat"]
        ds["longitude"] = ds.attrs["long"]

        # Variables attributes
        for var in ds:
            if var in VARIABLE_ATTRIBUTES:
                ds[var].attrs = VARIABLE_ATTRIBUTES[var]
        ds["temp"].attrs["units"] = ds.attrs.pop("temperature")
        ds = standardize_dataset(ds)
        return ds

log_csv(path, encoding='UTF-8', rename_variables=True)

Parse ElectricBlue log csv file

Parameters:

Name Type Description Default
path str

path to the csv file

required
encoding str

File encoding. Defaults to "UTF-8".

'UTF-8'
rename_variables bool

Rename variables to valid NetCDF names. Defaults to True.

True

Returns:

Type Description
Dataset

xarray.Dataset

Source code in ocean_data_parser/parsers/electricblue.py
def log_csv(
    path: str, encoding: str = "UTF-8", rename_variables: bool = True
) -> xarray.Dataset:
    """Parse ElectricBlue log csv file

    Args:
        path (str): path to the csv file
        encoding (str, optional): File encoding. Defaults to "UTF-8".
        rename_variables (bool, optional): Rename variables to
            valid NetCDF names. Defaults to True.

    Returns:
        xarray.Dataset
    """

    df = pd.read_csv(path, encoding=encoding, parse_dates=True, index_col=["time"])
    ds = df.to_xarray()
    # add default attributes
    ds.attrs.update({**GLOBAL_ATTRIBUTES, "source": path})
    ds = standardize_dataset(ds)

    # Rename variables to be compatible with NetCDF
    if rename_variables:
        ds = rename_variables_to_valid_netcdf(ds)
    return ds