def csv(
path: str,
encoding: str = "utf-8",
) -> xarray.Dataset:
"""ElectricBlue csv data format parser
Args:
path (str): path to the csv file to parse
encoding (str='UTF-8', optional): file encoding
Returns:
dataset: xarray dataset
"""
with open(path, encoding=encoding) as f:
line = True
metadata = default_global_attribute
metadata["source_file"] = path
while line:
line = f.readline()
metadata["source_file_header"] += line
line = line.strip()
if re.match(r"^[-,\s]+$", line):
continue
elif line.startswith("time,"):
break
else:
items = line.split(", ", 1)
key = items[0]
value = items[1] if len(items) == 2 else ""
attr = re.sub(r"[\s\[\]\(\)]+", "_", key.lower())
# cast value
if re.match(r"^[+-]*\d+$", value):
value = int(value)
elif re.match(r"^[+-]*\d+\.\d+$", value):
value = float(value)
metadata[attr] = value
columns = line.split(",")
time_zone = metadata.pop("time_zone")
df = pd.read_csv(
f,
sep=",",
header=None,
names=columns,
converters={0: lambda x: pd.to_datetime(x + time_zone, utc=True)},
)
if len(df) != metadata["samples"]:
logger.warning(
"Parsed data samples=%s do not match expected samples=%s",
str(len(df)),
metadata["samples"],
)
# Convert to xarray dataset
ds = df.to_xarray()
# Global attributes
ds.attrs = metadata
ds.attrs.update(
{
"instrument_type": ds.attrs.get("envlogger_version"),
"instrument_sn": ds.attrs.get("serial_number"),
}
)
ds["latitude"] = ds.attrs["lat"]
ds["longitude"] = ds.attrs["long"]
# Variables attributes
for var in ds:
if var in default_variable_attributes:
ds[var].attrs = default_variable_attributes[var]
ds["temp"].attrs["units"] = ds.attrs.pop("temperature")
ds = standardize_dataset(ds)
return ds