Skip to content

sunburst

Sunburst Sensors LLC.

Sunburst manufacture autonomous instruments that measure the marine and freshwater inorganic carbon parameters pCO2 (partial pressure of carbon dioxide) and pH using our patented drift-free technology.

superCO2(path, output=None)

Deprecated name for super_co2 function.

Source code in ocean_data_parser/parsers/sunburst.py
def superCO2(path: str, output: str = None) -> xarray.Dataset:  # noqa
    """Deprecated name for super_co2 function."""
    logger.warning("Function name superCO2 is deprecated, use super_co2 instead.")
    return super_co2(path, output)

superCO2_notes(path)

Deprecated name for super_co2 function.

Source code in ocean_data_parser/parsers/sunburst.py
def superCO2_notes(path: str) -> xarray.Dataset:  # noqa
    """Deprecated name for super_co2 function."""
    logger.warning("Function name superCO2 is deprecated, use super_co2 instead.")
    return super_co2_notes(path)

super_co2(path, output=None)

Parse superCO2 output file txt file.

Source code in ocean_data_parser/parsers/sunburst.py
def super_co2(path: str, output: str = None) -> xarray.Dataset:
    """Parse superCO2 output file txt file."""
    header = []
    line = 1
    with open(path, encoding="utf-8") as f:
        header += [f.readline()]
        if re.search(r"\d+ header lines", header[0]):
            n_header_lines = int(re.search(r"(\d+) header lines", header[0])[1])
        else:
            logger.error("Unknown header format")

        # Read the rest of the header lines
        while line < n_header_lines - 1:
            header.append(f.readline())
            line += 1

        # Read the column header and data with pandas
        df = pd.read_csv(
            f,
            sep=r"\t",
            engine="python",
            dtype=super_co2_dtypes,
            na_values=[-999, "NaN"],
        )
    if "Collected beginning on" in header[2]:
        collected_beginning_date = pd.to_datetime(header[3])
    else:
        collected_beginning_date = pd.NaT
    # Reformat variable names
    df.columns = [_format_variables(var) for var in df.columns]

    # Generate time variable from Date and Time columns
    df["time"] = pd.to_datetime(
        (df["Date"] + " " + df["Time"]), format="%Y%m%d %H%M%S", utc=True
    ).dt.tz_convert(None)

    # Review day of the year variable
    df["time_doy_utc"] = pd.to_datetime(
        df["DOY_UTC"] - 1,
        unit="D",
        origin=pd.Timestamp(collected_beginning_date.year, 1, 1),
        utc=True,
    ).dt.tz_convert(None)

    # Compare DOY_UTC vs Date + Time
    dt = (df["time"] - df["time_doy_utc"]).mean().total_seconds()
    dt_std = (df["time"] - df["time_doy_utc"]).std().total_seconds()
    if dt > MAXIMUM_TIME_DIFFERENCE_IN_SECONDS:
        logger.warning(
            "Date + Time and DOY_UTC variables have an average time difference of %ss>%ss with a standard deviation of %ss",
            dt,
            MAXIMUM_TIME_DIFFERENCE_IN_SECONDS,
            dt_std,
        )

    global_attributes = {
        "title": header[1].replace(r"\n", ""),
        "collected_beginning_date": collected_beginning_date,
    }

    if output == "dataframe":
        return df, global_attributes

    # Convert to an xarray dataset
    ds = df.to_xarray()
    ds.attrs = global_attributes

    return standardize_dataset(ds)

super_co2_notes(path)

Parse superCO2 notes files and return an xarray Dataset.

Parameters:

Name Type Description Default
path str

file path

required

Returns:

Type Description
Dataset

xarray.Dataset: Parsed dataset

Source code in ocean_data_parser/parsers/sunburst.py
def super_co2_notes(path: str) -> xarray.Dataset:
    """Parse superCO2 notes files and return an xarray Dataset.

    Args:
        path (str): file path

    Returns:
        xarray.Dataset: Parsed dataset
    """
    """Parse superCO2 notes files and return an xarray Dataset"""
    line = True
    notes = []
    with open(path, encoding="utf-8") as f:
        while line:
            line = f.readline()
            if line in (""):
                continue
            elif re.match(r"\d\d\d\d\/\d\d\/\d\d \d\d\:\d\d\:\d\d\s+\d+\.\d*", line):
                # Parse time row
                note_ensemble = re.match(
                    r"(?P<time>\d\d\d\d\/\d\d\/\d\d \d\d\:\d\d\:\d\d)\s+(?P<day_of_year>\d+\.\d*)",
                    line,
                ).groupdict()
                # type row
                note_ensemble["note_type"] = f.readline().replace("\n", "")
                # columns and data
                header = f.readline().replace("\n", "")
                columns = re.split(r"\s+", header)
                line = f.readline().replace("\n", "")
                data = re.split(r"\s+", line)

                # Combine notes to previously parsed ones
                notes += [{**note_ensemble, **dict(zip(columns, data))}]
    # Convert notes to a dataframe
    df = pd.DataFrame.from_dict(notes)
    df["time"] = pd.to_datetime(df["time"])
    df = df.astype(dtype=notes_dtype_mapping, errors="ignore")

    ds = df.to_xarray()
    ds.attrs = {"Convention": "CF-1.6"}
    return ds