Skip to content

amundsen

Amundsen

https://arcticnet.ulaval.ca/ https://amundsenscience.com/

Historically ArcticNet and the Amundsen Siences.

int_format(path, encoding='Windows-1252', map_to_vocabulary=True, generate_depth=True)

Parse Amundsen INT format.

The Amundsen INT format is a tabular format

Parameters:

Name Type Description Default
path str

file path to parse.

required
encoding str

File encoding. Defaults to "Windows-1252".

'Windows-1252'
map_to_vocabulary bool

Rename variables to vocabulary. Defaults to True.

True
generate_depth bool

Generate depth variable. Defaults to True.

True

Returns:

Type Description
xr.Dataset

xr.Dataset: xarray compliant with CF-1.6

Source code in ocean_data_parser/parsers/amundsen.py
def int_format(
    path: str,
    encoding: str = "Windows-1252",
    map_to_vocabulary: bool = True,
    generate_depth: bool = True,
) -> xr.Dataset:
    """Parse Amundsen INT format.

    The Amundsen INT format is a tabular format

    Args:
        path (str): file path to parse.
        encoding (str, optional): File encoding. Defaults to "Windows-1252".
        map_to_vocabulary (bool, optional): Rename variables to vocabulary. Defaults to True.
        generate_depth (bool, optional): Generate depth variable. Defaults to True.

    Returns:
        xr.Dataset: xarray compliant with CF-1.6
    """
    nc_logger, nc_handler = get_history_handler()
    logger.addHandler(nc_handler)

    logger.info(
        "Convert INT file format with python package ocean_data_parser.amundsen.int_format V%s",
        __version__,
    )
    metadata = default_global_attributes.copy()

    # Ignore info.int files
    if path.endswith("_info.int"):
        logger.warning("Ignore *_info.int files: %s", path)
        return

    logger.debug("Read %s", path)
    with open(path, encoding=encoding) as file:
        # Parse header
        for line in file:
            line = line.replace("\n", "")
            if re.match(r"^%\s*$", line) or not line:
                continue
            elif line and not re.match(r"\s*%", line) and line[0] == " ":
                last_line = line
                break
            elif ":" in line:
                key, value = line.strip()[1:].split(":", 1)
                metadata[key.strip()] = value.strip()
            elif re.match(r"% .* \[.+\]", line):
                logger.warning(
                    "Unknown variable name will be saved to unknown_variables_information: %s",
                    line,
                )
                metadata["unknown_variables_information"] += line + "\n"

            else:
                logger.warning("Unknown line format: %s", line)

        # Review metadata
        if metadata == default_global_attributes:
            logger.warning("No metadata was captured in the header of the INT file.")

        # Parse Columne Header by capital letters
        column_name_line = last_line
        delimiter_line = file.readline()
        if not re.match(r"^[\s\-]+$", delimiter_line):
            logger.error("Delimiter line below the column names isn't the expected one")

        # Parse column names based on delimiter line below
        delimited_segments = re.findall(r"\s*\-+", delimiter_line)
        start_segment = 0
        column_names = []
        for segment in delimited_segments:
            column_names += [
                column_name_line[start_segment : start_segment + len(segment)].strip()
            ]
            start_segment = start_segment + len(segment)

        # Parse data
        df = pd.read_csv(
            file,
            sep=r"\s+",
            names=column_names,
        )

        # Sort column attributes
        variables = {
            column: re.search(
                r"(?P<long_name>[^\[]+)(\[(?P<units>.*)\]){0,1}",
                metadata.pop(column[0].upper() + column[1:]),
            ).groupdict()
            if column[0].upper() + column[1:] in metadata
            else {}
            for column in df
        }
        if "Date" in df and "Hour" in df:
            is_60 = df["Hour"].str.contains(":60$")
            df.loc[is_60, "Hour"] = df.loc[is_60, "Hour"].str.replace(
                ":60$", ":00", regex=True
            )
            df["time"] = pd.to_datetime(df["Date"] + "T" + df["Hour"], utc=True)
            df.loc[is_60, "time"] += pd.Timedelta(seconds=60)

        # Convert to xarray object
        ds = df.to_xarray()

        # Standardize global attributes
        metadata = {
            _standardize_attribute_name(name): _standardize_attribute_value(
                value, name=name
            )
            for name, value in metadata.items()
        }
        ds.attrs = metadata

        # Generate instrument_depth variable
        pressure = [var for var in ds if var in ("Pres", "PRES")]
        if (
            generate_depth
            and pressure
            and ("Lat" in ds or "initial_latitude_deg" in ds.attrs)
        ):
            latitude = (
                ds["Latitude"] if "Lat" in ds else ds.attrs["initial_latitude_deg"]
            )
            logger.info(
                "Generate instrument_depth from TEOS-10: -1 * gsw.z_from_p(ds['Pres'], %s)",
                "ds['Lat']" if "Lat" in ds else "ds.attrs['initial_latitude_deg']",
            )
            ds["instrument_depth"] = -z_from_p(ds[pressure[0]], latitude)

        # Map varibles to vocabulary
        variables_to_rename = {}
        for var in ds:
            if var not in variables:
                continue

            ds[var].attrs = variables[var]
            if "long_name" in ds[var].attrs:
                ds[var].attrs["long_name"] = ds[var].attrs["long_name"].strip()

            # Include variable attributes from the vocabulary
            if not map_to_vocabulary:
                continue
            elif var not in amundsen_variable_attributes:
                logger.warning("No vocabulary is available for variable '%s'", var)
                continue

            # Match vocabulary
            var_units = ds[var].attrs.get("units")
            for item in amundsen_variable_attributes[var]:
                accepted_units = item.get("accepted_units")
                if (
                    var_units is None  # Consider first if no units
                    or var_units == item.get("units")
                    or (accepted_units and re.match(accepted_units, var_units))
                ):
                    if "rename" in item:
                        variables_to_rename[var] = item["rename"]

                    ds[var].attrs = {
                        key: value
                        for key, value in item.items()
                        if key not in ["accepted_units", "rename"]
                    }
                    break
            else:
                logger.warning(
                    "No Vocabulary available for %s: %s", var, str(ds[var].attrs)
                )

        # Review rename variables
        already_existing_variables = {
            var: rename for var, rename in variables_to_rename.items() if rename in ds
        }
        if already_existing_variables:
            logger.error(
                "Can't rename variable %s since it already exist",
                already_existing_variables,
            )

        if variables_to_rename:
            logger.info("Rename variables: %s", variables_to_rename)
            ds = ds.rename(variables_to_rename)

        # Generate history
        ds.attrs["history"] += nc_logger.getvalue()

        # Standardize dataset to be compatible with ERDDAP and NetCDF Classic
        ds = standardize_dataset(ds)
        return ds

Vocabulary

Amundsen Name long_name units standard_name accepted_units rename comments source
Net Net Number
Pres_open Net Open Pressure db
Pres_close Net Close Pressure db
Duration Sample Duration s
Volume Sample Volume m^3
Tem_mean Mean Temperature (ITS-90) degC sea_water_temperature
Tem_std Standard Deviation Temperature (ITS-90) degC sea_water_temperature
Sal_mean Mean Salinity (PSS-78) 1e-3 sea_water_practical_salinity PSU psu 1e-3
Sal_std Standard Deviation Salinity (PSS-78) 1e-3 sea_water_practical_salinity PSU psu 1e-3
Pres Sea Pressure (Sea Surface - 0) decibars sea_water_pressure_due_to_sea_water
Temp Temperature (ITS-90) degC sea_water_temperature degC deg C
Sal Practical Salinity (PSS-78) 1e-3 sea_water_practical_salinity PSU psu 1e-3
Date Acquisition Date yyyymmdd
Date Acquisition Date yyyy/mm/dd
Hour Acquisition Hour HHMMSS
Hour Acquisition Hour HH:MM:SS
Lat Latitude degrees N latitude
Long Longitude degrees E longitude
Wind dir True Wind Direction degrees N wind_from_direction
Wind speed True Wind Speed Knt wind_speed
Air temp Air Temperature degC air_temperature
Dew point Dew Point degC dew_point_temperature
Pressure Atmospheric Pressure hPa air_pressure
Humidity Air Humidity %
SV Sound Velocity m/s speed_of_sound_in_sea_water
Vel Sound Velocity From Velocity Sensor m/s speed_of_sound_in_sea_water
Trans Light Transmission %
Fluo Fluorescence ug/L mass_concentration_of_chlorophyll_in_sea_water ug/L mg/m\^3
O2 Dissolved Oxygen ml/l volume_fraction_of_oxygen_in_sea_water
O2 Dissolved Oxygen µM mole_concentration_of_dissolved_molecular_oxygen_in_sea_water O2M
ASAL Absolute Salinity (TEOS-10) g/kg sea_water_absolute_salinity
Asal Absolute Salinity TEOS-10 g/kg sea_water_absolute_salinity ASAL
CONT Conservative Temperature (TEOS-10) degC sea_water_conservative_temperature
Cont Conservative Temperature TEOS-10 degC sea_water_conservative_temperature CONT
D_CT In Situ Density TEOS-10 ((S, T, P) - 1000) kg/m^3 sea_water_sigma_t
D0CT Potential Density TEOS-10 ((S, T, 0) - 1000) kg/m^3 sea_water_sigma_theta
Tem-TSG Temperature From Thermo-Salinometer degrees_celcius sea_surface_temperature ¡c
Sal-TSG Salinity From Thermo-Salinometer 1e-3 sea_surface_practical_salinity PSU psu 1e-3
Speed Vessel Speed Knt platform_speed
Speed Vessel True Speed Knt platform_speed
Tem-Water Temperature From Sensor SBE38 degrees_celcius sea_water_temperature °c °c ¡c
Flow Water Flow Rate L/min
Dens Sigma-T (Rho (S, T, 0) - 1000) kg/m^3 sea_water_sigma_t
N2 Brunt-Vaisala Frequency s^-2 square_of_brunt_vaisala_frequency_in_sea_water
Sigt Sigma-T (Rho(S, T, 0)-1000) kg/m^3 sea_water_sigma_t sigt
sigt Density TEOS-10 ((S, T, P) - 1000) kg/m^3 sea_water_sigma_t
D_ct Density TEOS-10 ((S, T, P) - 1000) kg/m^3 sea_water_sigma_t
Theta Potential Temperature degC sea_water_potential_temperature theta
theta Potential Temperature degC sea_water_potential_temperature
sigthe Sigma-Theta (Rho(S, Theta, 0)-1000) kg/m^3 sea_water_sigma_theta
Sigthe Sigma-Theta (Rho(S, Theta, 0)-1000) kg/m^3 sea_water_sigma_theta sigthe
D0ct Density TEOS-10 (S, T, 0) kg/m^3 sea_water_sigma_theta
CDOM Fluorescence Of CDOM (Colored Dissolved Organic Matter) mg/m^3 concentration_of_colored_dissolved_organic_matter_in_sea_water_expressed_as_equivalent_mass_fraction_of_quinine_sulfate_dihydrate
Cdom Fluorescence Of CDOM (Colored Dissolved Organic Matter) mg/m^3 concentration_of_colored_dissolved_organic_matter_in_sea_water_expressed_as_equivalent_mass_fraction_of_quinine_sulfate_dihydrate CDOM
NO3 Nitrate (NO3-N) Content mmol/m^3 mole_concentration_of_nitrate_in_sea_water
Par Photosynthetic Active Radiation uEinsteins/s/m^2 downwelling_photosynthetic_photon_flux_in_sea_water µeinsteins/s/m\^2 µeinsteins/s/m\^2
SPar Surface Photosynthetic Active Radiation uEinsteins/s/m^2 surface_downwelling_photosynthetic_photon_flux_in_sea_water µeinsteins/s/m\^2 µeinsteins/s/m\^2
svan Specific Volume Anomaly m^3/kg
Svan Specific Volume Anomaly m^3/kg svan
FreezT Freezing Temperature degC freezing_temperature_of_sea_water
FreezT- Freezing Temperature degC freezing_temperature_of_sea_water FreezT
Heading Vessel Heading degrees N platform_orientation
Roll Vessel Roll degrees N platform_roll
Pitch Vessel Pitch degrees N platform_pitch
Heave Vessel Heave meter platform_heave
Track Vessel True Direction degrees N platform_course
GPS GPS Source =1 for POS-MV and =0 for CNAV =1 for POS-MV and =0 for CNAV
Depth Bottom Depth m
pH pH 1 sea_water_ph_reported_on_total_scale
PresPar Pressure Par dbar
time Measurement Time time Generated from Date Time variables
instrument_depth Instrument Depth m depth