Skip to content

sunburst

Sunburst Sensors LLC manufacture autonomous instruments that measure the marine and freshwater inorganic carbon parameters pCO2 (partial pressure of carbon dioxide) and pH using our patented drift-free technology.

superCO2(path, output=None)

Parse superCO2 output file txt file

Source code in ocean_data_parser/parsers/sunburst.py
def superCO2(path: str, output: str = None) -> xarray.Dataset:
    """Parse superCO2 output file txt file"""
    header = []
    line = 1
    with open(path, encoding="utf-8") as f:
        header += [f.readline()]
        if re.search(r"\d+ header lines", header[0]):
            n_header_lines = int(re.search(r"(\d+) header lines", header[0])[1])
        else:
            logger.error("Unknown header format")

        # Read the rest of the header lines
        while line < n_header_lines - 1:
            header.append(f.readline())
            line += 1

        # Read the column header and data with pandas
        df = pd.read_csv(
            f,
            sep=r"\t",
            engine="python",
            dtype=superCO2_dtypes,
            na_values=[-999, "NaN"],
        )
    if "Collected beginning on" in header[2]:
        collected_beginning_date = pd.to_datetime(header[3])
    else:
        collected_beginning_date = pd.NaT
    # Reformat variable names
    df.columns = [_format_variables(var) for var in df.columns]

    # Generate time variable from Date and Time columns
    df["time"] = pd.to_datetime(
        (df["Date"] + " " + df["Time"]), format="%Y%m%d %H%M%S", utc=True
    ).dt.tz_convert(None)

    # Review day of the year variable
    df["time_doy_utc"] = pd.to_datetime(
        df["DOY_UTC"] - 1,
        unit="D",
        origin=pd.Timestamp(collected_beginning_date.year, 1, 1),
        utc=True,
    ).dt.tz_convert(None)

    # Compare DOY_UTC vs Date + Time
    dt = (df["time"] - df["time_doy_utc"]).mean().total_seconds()
    dt_std = (df["time"] - df["time_doy_utc"]).std().total_seconds()
    if dt > MAXIMUM_TIME_DIFFERENCE_IN_SECONDS:
        logger.warning(
            "Date + Time and DOY_UTC variables have an average time difference of %ss>%ss with a standard deviation of %ss",
            dt,
            MAXIMUM_TIME_DIFFERENCE_IN_SECONDS,
            dt_std,
        )

    global_attributes = {
        "title": header[1].replace(r"\n", ""),
        "collected_beginning_date": collected_beginning_date,
    }

    if output == "dataframe":
        return df, global_attributes

    # Convert to an xarray dataset
    ds = df.to_xarray()
    ds.attrs = global_attributes

    return standardize_dataset(ds)

superCO2_notes(path)

Parse superCO2 notes files and return an xarray Dataset

Parameters:

Name Type Description Default
path str

file path

required

Returns:

Type Description
Dataset

xarray.Dataset: Parsed dataset

Source code in ocean_data_parser/parsers/sunburst.py
def superCO2_notes(path: str) -> xarray.Dataset:
    """Parse superCO2 notes files and return an xarray Dataset

    Args:
        path (str): file path

    Returns:
        xarray.Dataset: Parsed dataset
    """
    """Parse superCO2 notes files and return an xarray Dataset"""
    line = True
    notes = []
    with open(path, "r", encoding="utf-8") as f:
        while line:
            line = f.readline()
            if line in (""):
                continue
            elif re.match(r"\d\d\d\d\/\d\d\/\d\d \d\d\:\d\d\:\d\d\s+\d+\.\d*", line):
                # Parse time row
                note_ensemble = re.match(
                    r"(?P<time>\d\d\d\d\/\d\d\/\d\d \d\d\:\d\d\:\d\d)\s+(?P<day_of_year>\d+\.\d*)",
                    line,
                ).groupdict()
                # type row
                note_ensemble["note_type"] = f.readline().replace("\n", "")
                # columns and data
                header = f.readline().replace("\n", "")
                columns = re.split(r"\s+", header)
                line = f.readline().replace("\n", "")
                data = re.split(r"\s+", line)

                # Combine notes to previously parsed ones
                notes += [{**note_ensemble, **dict(zip(columns, data))}]
    # Convert notes to a dataframe
    df = pd.DataFrame.from_dict(notes)
    df["time"] = pd.to_datetime(df["time"])
    df = df.astype(dtype=notes_dtype_mapping, errors="ignore")

    ds = df.to_xarray()
    ds.attrs = {"Convention": "CF-1.6"}
    return ds