Skip to content

amundsen

This module contains the parser for the Amundsen INT format. This format is used to store oceanographic data generated by the Amundsen Science and ArcticNet programs.

int_format(path, encoding='Windows-1252', map_to_vocabulary=True, generate_depth=True)

Parse Amundsen INT format.

Parameters:

Name Type Description Default
path str

file path to parse.

required
encoding str

File encoding. Defaults to "Windows-1252".

'Windows-1252'
map_to_vocabulary bool

Rename variables to vocabulary. Defaults to True.

True
generate_depth bool

Generate depth variable. Defaults to True.

True

Returns:

Type Description
Dataset

xr.Dataset

Source code in ocean_data_parser/parsers/amundsen.py
def int_format(
    path: str,
    encoding: str = "Windows-1252",
    map_to_vocabulary: bool = True,
    generate_depth: bool = True,
) -> xr.Dataset:
    """Parse Amundsen INT format.

    Args:
        path (str): file path to parse.
        encoding (str, optional): File encoding. Defaults to "Windows-1252".
        map_to_vocabulary (bool, optional): Rename variables to vocabulary. Defaults to True.
        generate_depth (bool, optional): Generate depth variable. Defaults to True.

    Returns:
        xr.Dataset
    """
    nc_logger, nc_handler = get_history_handler()
    logger.addHandler(nc_handler)

    logger.info(
        "Convert INT file format with python package ocean_data_parser.amundsen.int_format V%s",
        __version__,
    )
    metadata = default_global_attributes.copy()

    # Ignore info.int files
    if path.endswith("_info.int"):
        logger.warning("Ignore *_info.int files: %s", path)
        return

    logger.debug("Read %s", path)
    with open(path, encoding=encoding) as file:
        # Parse header
        for line in file:
            line = line.replace("\n", "")
            if re.match(r"^%\s*$", line) or not line:
                continue
            elif line and not re.match(r"\s*%", line) and line[0] == " ":
                last_line = line
                break
            elif ":" in line:
                key, value = line.strip()[1:].split(":", 1)
                metadata[key.strip()] = value.strip()
            elif line == "% Fluorescence [ug/L]":
                metadata["Fluo"] = "Fluorescence [ug/L]"
            elif line == "% Conservative Temperature (TEOS-10) [deg C]":
                metadata["CONT"] = "Conservative Temperature (TEOS-10) [deg C]"
            elif line == "% In situ density TEOS10 ((s, t, p) - 1000) [kg/m^3]":
                metadata["D_CT"] = "In situ density TEOS10 ((s, t, p) - 1000) [kg/m^3]"
            elif line == "% Potential density TEOS10 ((s, t, 0) - 1000) [kg/m^3]":
                metadata["D0CT"] = (
                    "Potential density TEOS10 ((s, t, 0) - 1000) [kg/m^3]"
                )
            elif line == "% Potential density TEOS10 (s, t, 0) [kg/m^3]":
                metadata["D0CT"] = "Potential density TEOS10 (s, t, 0) [kg/m^3]"
            elif re.match(r"% .* \[.+\]", line):
                logger.warning(
                    "Unknown variable name will be saved to unknown_variables_information: %s",
                    line,
                )
                metadata["unknown_variables_information"] += line + "\n"

            else:
                logger.warning("Unknown line format: %s", line)

        # Review metadata
        if metadata == default_global_attributes:
            logger.warning("No metadata was captured in the header of the INT file.")

        # Parse Columne Header by capital letters
        column_name_line = last_line
        delimiter_line = file.readline()
        if not re.match(r"^[\s\-]+$", delimiter_line):
            logger.error("Delimiter line below the column names isn't the expected one")

        # Parse column names based on delimiter line below
        delimited_segments = re.findall(r"\s*\-+", delimiter_line)
        start_segment = 0
        column_names = []
        for segment in delimited_segments:
            column_names += [
                column_name_line[start_segment : start_segment + len(segment)].strip()
            ]
            start_segment = start_segment + len(segment)

        # Parse data
        df = pd.read_csv(
            file,
            sep=r"\s+",
            names=column_names,
        )

        # Sort column attributes
        variables = {
            column: re.search(
                r"(?P<long_name>[^\[]+)(\[(?P<units>.*)\]){0,1}",
                metadata.pop(column[0].upper() + column[1:]),
            ).groupdict()
            if column[0].upper() + column[1:] in metadata
            else {}
            for column in df
        }
        if "Date" in df and "Hour" in df:
            is_60 = df["Hour"].str.contains(":60$")
            df.loc[is_60, "Hour"] = df.loc[is_60, "Hour"].str.replace(
                ":60$", ":00", regex=True
            )
            df["time"] = pd.to_datetime(df["Date"] + "T" + df["Hour"], utc=True)
            df.loc[is_60, "time"] += pd.Timedelta(seconds=60)

        # Convert to xarray object
        ds = df.to_xarray()

        # Standardize global attributes
        metadata = {
            _standardize_attribute_name(name): _standardize_attribute_value(
                value, name=name
            )
            for name, value in metadata.items()
        }
        ds.attrs = metadata

        # Generate instrument_depth variable
        pressure = [var for var in ds if var in ("Pres", "PRES")]
        if (
            generate_depth
            and pressure
            and ("Lat" in ds or "initial_latitude_deg" in ds.attrs)
        ):
            latitude = (
                ds["Latitude"] if "Lat" in ds else ds.attrs["initial_latitude_deg"]
            )
            logger.info(
                "Generate instrument_depth from TEOS-10: -1 * gsw.z_from_p(ds['Pres'], %s)",
                "ds['Lat']" if "Lat" in ds else "ds.attrs['initial_latitude_deg']",
            )
            ds["instrument_depth"] = -z_from_p(ds[pressure[0]], latitude)

        # Map variables to vocabulary
        variables_to_rename = {}
        for var in ds:
            if var not in variables:
                continue

            ds[var].attrs = variables[var]
            if "long_name" in ds[var].attrs:
                ds[var].attrs["long_name"] = ds[var].attrs["long_name"].strip()

            # Include variable attributes from the vocabulary
            if not map_to_vocabulary:
                continue
            elif var not in amundsen_variable_attributes:
                logger.warning("No vocabulary is available for variable '%s'", var)
                continue

            # Match vocabulary
            var_units = ds[var].attrs.get("units")
            for item in amundsen_variable_attributes[var]:
                accepted_units = item.get("accepted_units")
                if (
                    var_units is None  # Consider first if no units
                    or var_units == item.get("units")
                    or (accepted_units and re.fullmatch(accepted_units, var_units))
                ):
                    if "rename" in item:
                        variables_to_rename[var] = item["rename"]

                    ds[var].attrs = {
                        key: value
                        for key, value in item.items()
                        if key not in ["accepted_units", "rename"]
                    }
                    break
            else:
                logger.warning(
                    "No Vocabulary available for %s: %s", var, str(ds[var].attrs)
                )

        # Review rename variables
        already_existing_variables = {
            var: rename for var, rename in variables_to_rename.items() if rename in ds
        }
        if already_existing_variables:
            logger.error(
                "Can't rename variable %s since it already exist",
                already_existing_variables,
            )

        if variables_to_rename:
            logger.info("Rename variables: %s", variables_to_rename)
            ds = ds.rename(variables_to_rename)

        # Generate history
        ds.attrs["history"] += nc_logger.getvalue()

        # Standardize dataset to be compatible with ERDDAP and NetCDF Classic
        ds = standardize_dataset(ds)
        return ds

Vocabulary

Amundsen Name long_name units standard_name accepted_units rename comments source
Net Net Number
Pres_open Net Open Pressure db
Pres_close Net Close Pressure db
Duration Sample Duration s
Volume Sample Volume m^3
Tem_mean Mean Temperature (ITS-90) degC sea_water_temperature degC degrees C
Tem_std Standard Deviation Temperature (ITS-90) degC sea_water_temperature degC degrees C
Sal_mean Mean Salinity (PSS-78) 1e-3 sea_water_practical_salinity PSU psu 1e-3
Sal_std Standard Deviation Salinity (PSS-78) 1e-3 sea_water_practical_salinity PSU psu 1e-3
Pres Sea Pressure (Sea Surface - 0) decibars sea_water_pressure_due_to_sea_water
Temp Temperature (ITS-90) degC sea_water_temperature degC deg C degrees C
Sal Practical Salinity (PSS-78) 1e-3 sea_water_practical_salinity PSU psu 1e-3
Date Acquisition Date yyyymmdd
Date Acquisition Date yyyy/mm/dd
Hour Acquisition Hour HHMMSS
Hour Acquisition Hour HH:MM:SS
Lat Latitude degrees N latitude
Long Longitude degrees E longitude
Wind dir True Wind Direction degrees N wind_from_direction
Wind speed True Wind Speed Knt wind_speed
Air temp Air Temperature degC air_temperature degC deg C
Dew point Dew Point degC dew_point_temperature degC deg C
Pressure Atmospheric Pressure hPa air_pressure
Humidity Air Humidity %
SV Sound Velocity m/s speed_of_sound_in_sea_water
Vel Sound Velocity From Velocity Sensor m/s speed_of_sound_in_sea_water
Trans Light Transmission %
Fluo Fluorescence ug/L mass_concentration_of_chlorophyll_in_sea_water ug/L mg/m\^3
O2 Dissolved Oxygen ml/l volume_fraction_of_oxygen_in_sea_water
O2 Dissolved Oxygen µM mole_concentration_of_dissolved_molecular_oxygen_in_sea_water O2M
ASAL Absolute Salinity (TEOS-10) g/kg sea_water_absolute_salinity
Asal Absolute Salinity TEOS-10 g/kg sea_water_absolute_salinity ASAL
CONT Conservative Temperature (TEOS-10) degC sea_water_conservative_temperature degC deg C
Cont Conservative Temperature TEOS-10 degC sea_water_conservative_temperature degC deg C CONT
D_CT In Situ Density TEOS-10 ((S, T, P) - 1000) kg/m^3 sea_water_sigma_t
D_ct Density TEOS-10 ((S, T, P) - 1000) kg/m^3 sea_water_sigma_t D_CT
Sigt Sigma-T (Rho(S, T, 0)-1000) kg/m^3 sea_water_sigma_t sigt
sigt Density TEOS-10 ((S, T, P) - 1000) kg/m^3 sea_water_sigma_t
D0CT Potential Density TEOS-10 ((S, T, 0) - 1000) kg/m^3 sea_water_sigma_theta
D0ct Density TEOS-10 (S, T, 0) kg/m^3 sea_water_sigma_theta D0CT
sigthe Sigma-Theta (Rho(S, Theta, 0)-1000) kg/m^3 sea_water_sigma_theta
Sigthe Sigma-Theta (Rho(S, Theta, 0)-1000) kg/m^3 sea_water_sigma_theta sigthe
Tem-TSG Temperature From Thermo-Salinometer degrees_celcius sea_surface_temperature ¡c
Sal-TSG Salinity From Thermo-Salinometer 1e-3 sea_surface_practical_salinity PSU psu 1e-3
Speed Vessel Speed Knt platform_speed
Speed Vessel True Speed Knt platform_speed
Tem-Water Temperature From Sensor SBE38 degrees_celcius sea_water_temperature °c °c ¡c
Flow Water Flow Rate L/min
Dens Sigma-T (Rho (S, T, 0) - 1000) kg/m^3 sea_water_sigma_t
N2 Brunt-Vaisala Frequency s^-2 square_of_brunt_vaisala_frequency_in_sea_water
Theta Potential Temperature degC sea_water_potential_temperature degC deg C theta
theta Potential Temperature degC sea_water_potential_temperature
CDOM Fluorescence Of CDOM (Colored Dissolved Organic Matter) mg/m^3 concentration_of_colored_dissolved_organic_matter_in_sea_water_expressed_as_equivalent_mass_fraction_of_quinine_sulfate_dihydrate
Cdom Fluorescence Of CDOM (Colored Dissolved Organic Matter) mg/m^3 concentration_of_colored_dissolved_organic_matter_in_sea_water_expressed_as_equivalent_mass_fraction_of_quinine_sulfate_dihydrate CDOM
NO3 Nitrate (NO3-N) Content mmol/m^3 mole_concentration_of_nitrate_in_sea_water
Par Photosynthetic Active Radiation uEinsteins/s/m^2 downwelling_photosynthetic_photon_flux_in_sea_water µeinsteins/s/m\^2 µeinsteins/s/m\^2
SPar Surface Photosynthetic Active Radiation uEinsteins/s/m^2 surface_downwelling_photosynthetic_photon_flux_in_sea_water µeinsteins/s/m\^2 µeinsteins/s/m\^2
svan Specific Volume Anomaly m^3/kg
Svan Specific Volume Anomaly m^3/kg svan
FreezT Freezing Temperature degC freezing_temperature_of_sea_water degC deg C
FreezT- Freezing Temperature degC freezing_temperature_of_sea_water FreezT
Heading Vessel Heading degrees N platform_orientation
Roll Vessel Roll degrees N platform_roll
Pitch Vessel Pitch degrees N platform_pitch
Heave Vessel Heave meter platform_heave
Track Vessel True Direction degrees N platform_course
GPS GPS Source =1 for POS-MV and =0 for CNAV =1 for POS-MV and =0 for CNAV
Depth Bottom Depth m
pH pH 1 sea_water_ph_reported_on_total_scale
PresPar Pressure Par dbar
time Measurement Time time Generated from Date Time variables
instrument_depth Instrument Depth m depth