Skip to content

sunburst

Sunburst

http://www.sunburstsensors.com/

superCO2(path, output=None)

Parse superCO2 output file txt file

Source code in ocean_data_parser/parsers/sunburst.py
def superCO2(path: str, output: str = None) -> xarray.Dataset:
    """Parse superCO2 output file txt file"""
    header = []
    line = 1
    with open(path, encoding="utf-8") as f:
        header += [f.readline()]
        if re.search(r"\d+ header lines", header[0]):
            n_header_lines = int(re.search(r"(\d+) header lines", header[0])[1])
        else:
            logger.error("Unknown header format")

        # Read the rest of the header lines
        while line < n_header_lines - 1:
            header.append(f.readline())
            line += 1

        # Read the column header and data with pandas
        df = pd.read_csv(
            f,
            sep=r"\t",
            engine="python",
            dtype=superCO2_dtypes,
            na_values=[-999, "NaN"],
        )
    if "Collected beginning on" in header[2]:
        collected_beginning_date = pd.to_datetime(header[3])
    else:
        collected_beginning_date = pd.NaT
    # Reformat variable names
    df.columns = [_format_variables(var) for var in df.columns]

    # Generate time variable from Date and Time columns
    df["time"] = (
        pd.to_datetime(
            (df["Date"] + " " + df["Time"]), format="%Y%m%d %H%M%S", utc=True
        )
        .dt.tz_convert(None)
        .dt.to_pydatetime()
    )

    # Review day of the year variable
    df["time_doy_utc"] = (
        pd.to_datetime(
            df["DOY_UTC"] - 1,
            unit="D",
            origin=pd.Timestamp(collected_beginning_date.year, 1, 1),
            utc=True,
        )
        .dt.tz_convert(None)
        .dt.to_pydatetime()
    )

    # Compare DOY_UTC vs Date + Time
    dt = (df["time"] - df["time_doy_utc"]).mean().total_seconds()
    dt_std = (df["time"] - df["time_doy_utc"]).std().total_seconds()
    if dt > MAXIMUM_TIME_DIFFERENCE_IN_SECONDS:
        logger.warning(
            "Date + Time and DOY_UTC variables have an average time difference of %ss>%ss with a standard deviation of %ss",
            dt,
            MAXIMUM_TIME_DIFFERENCE_IN_SECONDS,
            dt_std,
        )

    global_attributes = {
        "title": header[1].replace(r"\n", ""),
        "collected_beginning_date": collected_beginning_date,
    }

    if output == "dataframe":
        return df, global_attributes

    # Convert to an xarray dataset
    ds = df.to_xarray()
    ds.attrs = global_attributes

    return standardize_dataset(ds)

superCO2_notes(path)

Parse superCO2 notes files and return an xarray Dataset

Parameters:

Name Type Description Default
path str

file path

required

Returns:

Type Description
xarray.Dataset

xarray.Dataset: Parsed dataset

Source code in ocean_data_parser/parsers/sunburst.py
def superCO2_notes(path: str) -> xarray.Dataset:
    """Parse superCO2 notes files and return an xarray Dataset

    Args:
        path (str): file path

    Returns:
        xarray.Dataset: Parsed dataset
    """
    """Parse superCO2 notes files and return an xarray Dataset"""
    line = True
    notes = []
    with open(path, "r", encoding="utf-8") as f:
        while line:
            line = f.readline()
            if line in (""):
                continue
            elif re.match(r"\d\d\d\d\/\d\d\/\d\d \d\d\:\d\d\:\d\d\s+\d+\.\d*", line):
                # Parse time row
                note_ensemble = re.match(
                    r"(?P<time>\d\d\d\d\/\d\d\/\d\d \d\d\:\d\d\:\d\d)\s+(?P<day_of_year>\d+\.\d*)",
                    line,
                ).groupdict()
                # type row
                note_ensemble["note_type"] = f.readline().replace("\n", "")
                # columns and data
                header = f.readline().replace("\n", "")
                columns = re.split(r"\s+", header)
                line = f.readline().replace("\n", "")
                data = re.split(r"\s+", line)

                # Combine notes to previously parsed ones
                notes += [{**note_ensemble, **dict(zip(columns, data))}]
    # Convert notes to a dataframe
    df = pd.DataFrame.from_dict(notes)
    df["time"] = pd.to_datetime(df["time"]).dt.to_pydatetime()
    df = df.astype(dtype=notes_dtype_mapping, errors="ignore")

    ds = df.to_xarray()
    ds.attrs = {"Convention": "CF-1.6"}
    return ds