Skip to content

dfo.nafc

The Fisheries and Oceans Canada - Newfoundland and Labrador Region - North Atlantic Fisheries Centre (NAFC) is a research facility located in St. John's, Newfoundland and Labrador.

pcnv(path, rename_variables=True, generate_extra_variables=True, global_attributes=None, encoding='UTF-8', encoding_errors='strict', match_metqa_table=False)

DFO NAFC pcnv file format parser

The pcnv format essentially a seabird cnv file format with NAFC specific inputs within the manual section.

Parameters:

Name Type Description Default
path Path

pcvn file path

required
rename_variables bool

Rename variables to DFO BODC names. Defaults to True.

True
generate_extra_variables bool

Generate extra vocabulary variables. Defaults to True.

True
global_attributes dict

Global attributes to add to the dataset.

None
match_metqa_table bool

Match metqa table to the file if available within same directory. Defaults to True.

False

Returns:

Type Description
Dataset

xr.Dataset

Source code in ocean_data_parser/parsers/dfo/nafc.py
def pcnv(
    path: Path,
    rename_variables: bool = True,
    generate_extra_variables: bool = True,
    global_attributes: dict = None,
    encoding: str = "UTF-8",
    encoding_errors: str = "strict",
    match_metqa_table: bool = False,
) -> xr.Dataset:
    """DFO NAFC pcnv file format parser

    The pcnv format  essentially a seabird cnv file format
    with NAFC specific inputs within the manual section.

    Args:
        path (Path): pcvn file path
        rename_variables (bool, optional): Rename variables to
            DFO BODC names. Defaults to True.
        generate_extra_variables (bool, optional): Generate extra
            vocabulary variables. Defaults to True.
        global_attributes (dict, optional): Global attributes to add to the dataset.
        match_metqa_table (bool, optional): Match metqa table to the file if
            available within same directory. Defaults to True.

    Returns:
        xr.Dataset
    """

    def _pop_attribute_from(names: list):
        """Pop attribute from dataset"""
        for name in names:
            if name in ds.attrs:
                return ds.attrs.pop(name)

        if "comment" not in names[0].lower():
            logger.error("No matching attribute found in {}", names)

    def get_vocabulary(**kwargs):
        return p_file_vocabulary.query(
            " and ".join(f"{key} == '{value}'" for key, value in kwargs.items())
        ).to_dict(orient="records")

    path = Path(path)
    ds = seabird.cnv(
        path,
        encoding=encoding,
        encoding_errors=encoding_errors,
        xml_parsing_error_level="WARNING",
    )

    # Map global attributes
    ship_trip_seq_station = re.search(
        r"(?P<dfo_nafc_platform_name>\w{3})(?P<trip>\d{3})_(?P<year>\d{4})_(?P<stn>\d{3})",
        ds.attrs.pop("vessel_trip_seq_stn", ""),
    )
    if not ship_trip_seq_station:
        logger.error(
            "Unable to parse ship_trip_seq_station from VESEL/TRIP/SEQ STN= {}",
            ds.attrs.get("vessel_trip_seq_stn", ""),
        )
    ds.attrs.update(
        {
            "dfo_nafc_platform_name": ship_trip_seq_station["dfo_nafc_platform_name"],
            **_get_platform_by_nafc_platform_name(
                ship_trip_seq_station["dfo_nafc_platform_name"]
            ),
            "trip": _int(ship_trip_seq_station["trip"]),
            "year": _int(ship_trip_seq_station["year"]),
            "station": _int(ship_trip_seq_station["stn"]),
            "time": pd.to_datetime(ds.attrs.pop("date_time"), utc=True),
            "latitude": _parse_lat_lon(
                _pop_attribute_from(
                    ["latitude", "latitude_xx_xx.xx", "latitude_xx_xx.xx_n"]
                )
            ),
            "longitude": _parse_lat_lon(
                _pop_attribute_from(
                    ["longitude", "longitude_xx_xx.xx", "longitude _xx_xx.xx_w"]
                )
            ),
            "sounder_depth": ds.attrs.pop("sounding_depth_m", None),
            "instrument": ds.attrs.pop("probe_type", None),
            "set_number": _int(ds.attrs.pop("xbt_number", None))
            or _int(ds.attrs.pop("ctd_number", None)),
            "format": ds.attrs.pop("format", None),
            "commment": _pop_attribute_from(["comments", "comments_14_char"]),
            # "trip_tag": ds.attrs.pop("trip_tag", None),
            "vnet": ds.attrs.pop("vnet", None),
            "do2": ds.attrs.pop("do2", None),
            "bottles": _int(ds.attrs.pop("bottles", None)),
            **_add_metqa_info_to_pcvn(path, match_metqa_table),
            **(global_attributes or {}),
        }
    )

    # Move coordinates to variables
    coords = ["time", "latitude", "longitude"]
    p_file_vocabulary
    for coord in coords:
        if coord in ds.attrs:
            ds[coord] = ds.attrs[coord]
            ds[coord].attrs = get_vocabulary(variable_name=coord)[0]

    ds = ds.set_coords([coord for coord in coords if coord in ds])

    # Map variable attributes to pfile vocabulary via long_name
    variables_new_name = {}
    for variable in ds.variables:
        if variable in coords:
            continue
        variable_attributes = get_vocabulary(
            long_name=ds[variable].attrs.get("long_name", variable)
        )
        if not variable_attributes:
            logger.warning("Missing vocabulary for variable={}", variable)
            continue

        if rename_variables and variable_attributes[-1].get("variable_name"):
            variables_new_name[variable] = variable_attributes[-1].pop("variable_name")

        ds[variable].attrs.update(variable_attributes[-1])
        if not generate_extra_variables:
            continue

        for extra in variable_attributes[0:-1]:
            new_var = extra.pop("variable_name", variable)
            if new_var == "depth" and "depSM" in ds.variables:
                continue
            logger.debug("Generate extra variable={}", new_var)
            ds[new_var] = (ds[variable].dims, ds[variable].data, extra)

    if rename_variables:
        logger.debug("Rename variables to NAFC standard: {}", variables_new_name)
        ds = ds.rename(variables_new_name)

    ds = standardize_dataset(ds)

    return ds

pfile(file, encoding='UTF-8', rename_variables=True, generate_extra_variables=True, encoding_errors='strict')

Parse DFO NAFC oceanography p-file format

Parameters:

Name Type Description Default
file str

file path

required
encoding str

file encoding. Defaults to "UTF-8".

'UTF-8'
rename_variables bool

Rename variables to BODC standard. Defaults to True.

True
generate_extra_variables bool

Generate extra BODC mapping variables. Defaults to True.

True

Raises:

Type Description
TypeError

File provided isn't a p file.

Returns:

Type Description
Dataset

xr.Dataset: Parser dataset

Source code in ocean_data_parser/parsers/dfo/nafc.py
def pfile(
    file: str,
    encoding: str = "UTF-8",
    rename_variables: bool = True,
    generate_extra_variables: bool = True,
    encoding_errors: str = "strict",
) -> xr.Dataset:
    """Parse DFO NAFC oceanography p-file format

    Args:
        file (str): file path
        encoding (str, optional): file encoding. Defaults to "UTF-8".
        rename_variables (bool, optional): Rename variables to BODC
            standard. Defaults to True.
        generate_extra_variables (bool, optional): Generate extra
            BODC mapping variables. Defaults to True.

    Raises:
        TypeError: File provided isn't a p file.

    Returns:
        xr.Dataset: Parser dataset
    """

    def _parse_ship_trip_stn():
        """Review if the ship,trip,stn string is the same
        accorss the 3 metadata rows and the file name."""
        ship_trip_stn = [line[:8] for line in metadata_lines[1:] if line.strip()] + [
            file.stem
        ]
        if len(set(ship_trip_stn)) != 1:
            logger.warning(
                "Ship,trip,station isn't consistent: {}. "
                "The file name will be considered = {}.",
                set(ship_trip_stn),
                file.stem,
            )
        return dict(
            ship_code=file.stem[:2],
            trip=int(file.stem[2:5]),
            station=int(file.stem[5:8]),
        )

    file = Path(file)
    line = None
    header = {}
    section = None
    with open(file, encoding=encoding, errors=encoding_errors) as file_handle:
        # Read the four first lines to extract the information
        original_header = [file_handle.readline() for _ in range(4)]
        metadata_lines = original_header[:4]
        while file_handle:
            previous_line, line = line, file_handle.readline()
            if "-- DATA --" in line:
                break
            original_header += [line]

            # search section specific
            new_section = re.search(r"-- ([\w\s]+) -->", line)
            section = new_section[1] if new_section else section
            if section and "-- END --" in line:
                section = None
            if section is None:
                continue
            elif section not in header:
                header[section] = []

            header[section] += [line]

        # Define each fields width based on the column names
        names = re.findall(r"\w+", previous_line)
        if len(names) != len(set(names)):
            # Rename duplicated names
            logger.warning("Column names aren't unique: {}", names)
            duplicated_names = []
            for index, name in enumerate(names):
                if names.count(name) > 1:
                    new_name = f"{name}{names[:index].count(name)}"
                    logger.warning("Rename {} to {}", name, new_name)
                    duplicated_names += [new_name]
                else:
                    duplicated_names += [name]

            names = duplicated_names

        # Read data section
        # TODO confirm that 5+12 character width is constant
        ds = pd.read_csv(
            file_handle,
            sep=r"\s+",
            engine="python",
            names=names,
            dtype={name: _get_dtype(name) for name in names},
            encoding_errors=encoding_errors,
        ).to_xarray()

    if len(ds.index) == 0:
        logger.error("No data found in file")

    # Review datatypes
    if any([dtype is object for _, dtype in ds.dtypes.items()]):
        logger.warning(
            "Some columns dtype=object suggest the file data wasn't correctely parsed."
        )

    # Review metadata
    if metadata_lines[0] == "NAFC_Y2K_HEADER":
        raise TypeError(
            "File header doesn't contain pfile first line 'NAFC_Y2K_HEADER'"
        )

    # Convert dataframe to an xarray and populate information
    ds.attrs.update(
        {
            "id": metadata_lines[1][:8],
            **global_attributes,
            **_parse_ship_trip_stn(),
            **(_parse_pfile_header_line1(metadata_lines[1]) or {}),
            **(_parse_pfile_header_line2(metadata_lines[2]) or {}),
            **(_parse_pfile_header_line3(metadata_lines[3]) or {}),
            "history": header.get("HISTORY"),
        }
    )
    ds.attrs["original_header"] = "\n".join(original_header)
    ds.attrs["history"] = _pfile_history_to_cf(header.get("HISTORY"))
    ds.attrs.update(_get_platform_by_nafc_platform_code(ds.attrs.get("ship_code", {})))

    # Move coordinates to variables:
    coords = ["time", "latitude", "longitude"]
    for coord in coords:
        if coord in ds.attrs:
            ds[coord] = ds.attrs[coord]
    ds = ds.set_coords([coord for coord in coords if coord in ds])

    # Populate variable attributes base on vocabulary
    variables_span = _parse_channel_stats(header.get("CHANNEL STATS"))
    extra_vocabulary_variables = []
    for var in ds.variables:
        ds[var].attrs.update(variables_span.get(var, {}))
        variable_attributes = _get_pfile_variable_vocabulary(
            var, ds.attrs.get("instrument")
        )
        if not variable_attributes:
            continue

        ds[var].attrs.update(variable_attributes[0])
        for extra in variable_attributes[1:]:
            extra_vocabulary_variables += [
                [
                    extra.pop("variable_name", var),
                    ds[var],
                    extra,
                ]
            ]

    # Rename variables
    if rename_variables:
        ds = ds.rename(
            {
                var: ds[var].attrs.pop("variable_name")
                for var in ds.variables
                if "variable_name" in ds[var].attrs
                and ds[var].attrs["variable_name"]
                and ds[var].attrs["variable_name"] != var
            }
        )

    # Generate extra variables
    if generate_extra_variables:
        for name, var, attrs in extra_vocabulary_variables:
            if name in ds:
                logger.info(
                    (
                        "Extra variable is already in dataset and will be ignored. "
                        "name={}, attrs={} is already in dataset and will be ignored"
                    ),
                    var.name,
                    attrs,
                )
                continue
            apply_func = attrs.pop("apply_func", None)
            new_data = (
                eval(
                    apply_func,
                    {},
                    {
                        "gsw": gsw,
                        **{
                            ds[variable].attrs.get("legacy_p_code", variable): ds[
                                variable
                            ]
                            for variable in ds.variables
                        },
                    },
                )
                if apply_func not in (None, np.nan)
                else var
            )
            ds.attrs["history"] += (
                f"\n{pd.Timestamp.now()} - Generated variable {name} = {apply_func}"
            )
            attrs["source"] = f"Generated variable {name} = {apply_func}"
            ds[name] = (var.dims, new_data.data, {**var.attrs, **attrs})

    # standardize
    ds = standardize_dataset(ds)

    return ds

p-files vocabulary

legacy_p_code accepted_instruments apply_func seabird_name variable_name long_name units sdn_uom_urn sdn_uom_name instrument comments sdn_parameter_urn sdn_parameter_name bodc_alternative_label definition standard_name coverage_content_type
cond c0S/m CNDCST01 Conductivity S/m SDN:P06::UECA Siemens per metre 1st sensor SDN:P01::CNDCST01 Electrical conductivity of the water body by CTD CTDCond sea_water_electrical_conductivity physicalMeasurement
cond2 c1S/m CNDCST02 Conductivity, 2 S/m SDN:P06::UECA Siemens per metre 2nd sensor SDN:P01::CNDCST02 Electrical conductivity of the water body by CTD (sensor 2) CTDCond2 sea_water_electrical_conductivity physicalMeasurement
flor CPHLPR01 Fluorescence mg/m^3 SDN:P06::UMMC Milligrams per cubic metre 1st sensor SDN:P01::CPHLPR01 Concentration of chlorophyll-a {chl-a CAS 479-61-8} per unit volume of the water body [particulate >unknown phase] by in-situ chlorophyll fluorometer chl-a_water_ISfluor In-situ fluorometer with either manufacturer, laboratory or sample calibration applied. mass_concentration_of_chlorophyll_a_in_sea_water physicalMeasurement
flor WET Labs ECO-AFL/FL flECO-AFL Fluorescence, Wet Labs Eco-Afl/Fl mg/m^3 SDN:P06::UMMC Milligrams per cubic metre WET Labs ECO-AFL/FL 1st sensor mass_concentration_of_chlorophyll_a_in_sea_water physicalMeasurement
flor Chelsea Aqua 3 Chl Con flC Fluorescence, Chelsea Aqua 3 Chl Con mg/m^3 SDN:P06::UGPL Micrograms per litre Chelsea Aqua 3 Chl Con 1st sensor mass_concentration_of_chlorophyll_a_in_sea_water physicalMeasurement
oxy sbeox0ML/L DOXYSU01 Oxygen, SBE 43 ml/l SDN:P06::UMLL Millilitres per litre SBE 43 1st sensor SDN:P01::DOXYSU01 Concentration of oxygen {O2 CAS 7782-44-7} per unit volume of the water body [dissolved plus reactive particulate phase] by Sea-Bird SBE 43 sensor and no calibration against sample data WC_dissO2_uncalib Sea-Bird SBE 43 oxygen sensor with no field calibration against sample data volume_fraction_of_oxygen_in_sea_water physicalMeasurement
oxy sbeox0ML/L DOXYZZ01 Oxygen, SBE 43 ml/l SDN:P06::UMLL Millilitres per litre SBE 43 1st sensor SDN:P01::DOXYZZ01 Concentration of oxygen {O2 CAS 7782-44-7} per unit volume of the water body [dissolved plus reactive particulate phase] by in-situ sensor WC_dissO2_IS Unspecified type of oxygen sensor with no information on whether it has been calibrated against sample data volume_fraction_of_oxygen_in_sea_water physicalMeasurement
oxy2 sbeox1ML/L DOXYSU02 Oxygen, SBE 43, 2 ml/l SDN:P06::UMLL Millilitres per litre SBE 43 2nd sensor SDN:P01::DOXYSU02 Concentration of oxygen {O2 CAS 7782-44-7} per unit volume of the water body [dissolved plus reactive particulate phase] by Sea-Bird SBE 43 sensor (second sensor) and no calibration against sample data WC_dissO2_uncalib2 Secondary Sea-Bird SBE 43 oxygen sensor with no field calibration against sample data volume_fraction_of_oxygen_in_sea_water physicalMeasurement
oxy2 sbeox1ML/L DOXYZZ02 Oxygen, SBE 43, 2 ml/l SDN:P06::UMLL Millilitres per litre SBE 43 2nd sensor SDN:P01::DOXYZZ02 Concentration of oxygen {O2 CAS 7782-44-7} per unit volume of the water body [dissolved plus reactive particulate phase] by in-situ sensor WC_dissO2_IS_2 Unspecified type of oxygen sensor with no information on whether it has been calibrated against sample data volume_fraction_of_oxygen_in_sea_water physicalMeasurement
oxypsat oxsatML/L OXYSZZ01 Oxygen Saturation, Weiss ml/l SDN:P06::UMLL Millilitres per litre Weiss SDN:P01::OXYSZZ01 Saturation of oxygen {O2 CAS 7782-44-7} in the water body [dissolved plus reactive particulate phase] O2Sat Computed (algorithm/data source unspecified) physicalMeasurement
sbeox0V sbeox0V Oxygen Raw, SBE 43 V SDN:P06::UVLT Volts SBE 43 1st sensor physicalMeasurement
sbeox1V sbeox1V Oxygen Raw, SBE 43, 2 V SDN:P06::UVLT Volts SBE 43 2nd sensor physicalMeasurement
par light par DWIRRXUD Par/Irradiance, Biospherical/Licor µeinsteins/s/m^2 downwelling_photon_spherical_irradiance_in_sea_water Biospherical/Licor Biospherical, Licor, or Chelsea sensor; 1st sensor SDN:P01::DWIRRXUD Downwelling vector irradiance as energy of electromagnetic radiation (PAR wavelengths) in the water body by cosine-collector radiometer SubSurDWVectPAR downwelling_photon_spherical_irradiance_in_sea_water
par Biospherical/Licor par DWIRRXUD Par/Irradiance, Biospherical/Licor µeinsteins/s/m^2 downwelling_photon_spherical_irradiance_in_sea_water Biospherical/Licor Biospherical, Licor, or Chelsea sensor; 1st sensor SDN:P01::DWIRRXUD Downwelling vector irradiance as energy of electromagnetic radiation (PAR wavelengths) in the water body by cosine-collector radiometer SubSurDWVectPAR downwelling_photon_spherical_irradiance_in_sea_water physicalMeasurement
par Biospherical/Satlantic par DWIRRXUD PAR/Logarithmic, Satlantic µeinsteins/s/m^2 downwelling_photon_spherical_irradiance_in_sea_water Biospherical/Licor Biospherical, Licor, or Chelsea sensor; 1st sensor SDN:P01::DWIRRXUD Downwelling vector irradiance as energy of electromagnetic radiation (PAR wavelengths) in the water body by cosine-collector radiometer SubSurDWVectPAR downwelling_photon_spherical_irradiance_in_sea_water physicalMeasurement
ph ph PHXXZZXX pH SDN:P06::UUPH pH units SDN:P01::PHXXZZXX pH (unspecified scale) of the water body pH Minus the log of the hydrogen ion concentration (moles per litre) in the water column. sea_water_ph_reported_on_total_scale physicalMeasurement
pres prDM PRESPR01 Pressure, Strain Gauge dbar SDN:P06::UPBL Decibar Digiquartz Digiquartz pressure sensor SDN:P01::PRESPR01 Pressure (spatial coordinate) exerted by the water body by profiling pressure sensor and correction to read zero at sea level Pres_Z The force per unit area exerted by the water column on a mobile sensor located in the water column. sea_water_pressure physicalMeasurement
pres -gsw.z_from_p(pres,latitude) depth depth Depth [salt water] m SDN:P06::ULAA Metres depth coordinate
depth depth depth Depth [salt water] m SDN:P06::ULAA Metres depth coordinate
sal sal00 PSALST01 Salinity, Practical PSU SDN:P06::UUUU Dimensionless 1st sensor SDN:P01::PSALST01 Practical salinity of the water body by CTD and computation using UNESCO 1983 algorithm P_sal_CTD This is the preferred term for this definition. Alternative term PSALST02 is included to cover cases where there are two sensors of the same type contributing to the data set and referential integrity considerations prevent a usage of a single code. sea_water_practical_salinity physicalMeasurement
sal2 sal11 PSALST02 Salinity, Practical, 2 PSU SDN:P06::UUUU Dimensionless 2nd sensor SDN:P01::PSALST02 Practical salinity of the water body by CTD (second sensor) and computation using UNESCO 1983 algorithm P_sal_CTD2 This is the alternative term for this definition. Only use to cover cases where there are two sensors of the same type contributing to the data set and referential integrity considerations prevent a usage of a single code. sea_water_practical_salinity physicalMeasurement
scan scan scan Scan Count physicalMeasurement
sig sigma-t00 SIGTEQST Density kg/m^3 SDN:P06::UKMC Kilograms per cubic metre 1st sensor SDN:P01::SIGTEQST Sigma-T of the water body by computation from salinity and temperature using UNESCO algorithm Sigma-T Computed by UNESCO SVAN function using in-situ temperature sea_water_sigma_t physicalMeasurement
sig2 sigma-t11 SIGTEQST2 Density 2 [sigma-t] kg/m^3 SDN:P06::UKMC Kilograms per cubic metre 2nd sensor SDN:P01::SIGTEQST Sigma-T of the water body by computation from salinity and temperature using UNESCO algorithm Sigma-T Computed by UNESCO SVAN function using in-situ temperature sea_water_sigma_t physicalMeasurement
temp t090C TEMPS901 Temperature [ITS-90] degrees_celsius SDN:P06::UPAA Degrees Celsius 1st sensor SDN:P01::TEMPS901 Temperature (ITS-90) of the water body by CTD or STD CTDTmp90 sea_water_temperature physicalMeasurement
temp2 t190C TEMPS902 Temperature, 2 degrees_celsius SDN:P06::UPAA Degrees Celsius 2nd sensor SDN:P01::TEMPS902 Temperature (ITS-90) of the water body by CTD or STD (second sensor) CTDTmp90_2 sea_water_temperature physicalMeasurement
tra CStarAt0 ATTNZS01 Beam Attenuation, Wet Labs C-Star 1/m SDN:P06::UPRM per metre WET Labs C-Star 1st sensor SDN:P01::ATTNZS01 Attenuation (red light wavelength) per unit length of the water body by WET Labs transmissometer and calibration to read zero in clear water RedPWCorr WET Labs transmissometer calibrated to zero in clear water volume_beam_attenuation_coefficient_of_radiative_flux_in_sea_water physicalMeasurement
trp trans CStarTr0 OPTCPS01 Beam Transmission, Wet Labs C-Star % SDN:P06::UPCT Percent WET Labs C-Star 1st sensor SDN:P01::OPTCPS01 Transmittance (red light wavelength) per unit length of the water body TransmRedLight
wet wetCDOM CCOMD002 Fluorescence, Wet Labs CDOM mg/m^3 SDN:P06::UMMC Milligrams per cubic metre WET Labs CDOM 1st sensor SDN:P01::CCOMD002 Concentration of coloured dissolved organic matter {CDOM Gelbstoff} per unit volume of the water body [dissolved plus reactive particulate phase] by in-situ WET Labs FDOM ECO fluorometer CDOM_fluor_WETLabs Quantity or mass of the specified analyte in a unit volume of an unfiltered sample of fresh or salt water. concentration_of_colored_dissolved_organic_matter_in_sea_water_expressed_as_equivalent_mass_fraction_of_quinine_sulfate_dihydrate physicalMeasurement
sigt SIGTEQST Sigma-T kg/m^3 SDN:P06::UKMC Kilograms per cubic metre SDN:P01::SIGTEQST Sigma-T of the water body by computation from salinity and temperature using UNESCO algorithm Sigma-T Computed by UNESCO SVAN function using in-situ temperature sea_water_sigma_t physicalMeasurement
sigmat SIGTEQST Sigma-T kg/m^3 SDN:P06::UKMC Kilograms per cubic metre SDN:P01::SIGTEQST Sigma-T of the water body by computation from salinity and temperature using UNESCO algorithm Sigma-T Computed by UNESCO SVAN function using in-situ temperature sea_water_sigma_t physicalMeasurement
time time Time time coordinate
latitude latitude Latitude degrees_north SDN:P06::DEGN Degrees north latitude coordinate
longitude longitude Longitude degrees_east SDN:P06::DEGN Degrees east longitude coordinate
index index index auxiliaryInformation
flag flag Flag qualityInformation