Unidata/MetPy

View on GitHub
src/metpy/io/station_data.py

Summary

Maintainability
A
0 mins
Test Coverage
# Copyright (c) 2019 MetPy Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Pull out station metadata."""
from collections import ChainMap, namedtuple
from collections.abc import Mapping
from functools import cached_property

import numpy as np
import pandas as pd

from ..cbook import get_test_data
from ..package_tools import Exporter
from ..units import units

exporter = Exporter(globals())
Station = namedtuple('Station', ['id', 'synop_id', 'name', 'state', 'country',
                                 'longitude', 'latitude', 'altitude', 'source'])


def to_dec_deg(dms):
    """Convert to decimal degrees."""
    if not dms:
        return 0.
    deg, minutes = dms.split()
    side = minutes[-1]
    minutes = minutes[:2]
    float_deg = int(deg) + int(minutes) / 60.
    return float_deg if side in ('N', 'E') else -float_deg


def _read_station_table(input_file=None):
    """Read in the GEMPAK station table.

    Yields tuple of station ID and `Station` for each entry.
    """
    if input_file is None:
        input_file = get_test_data('sfstns.tbl', as_file_obj=False)
    with open(input_file) as station_file:
        for line in station_file:
            stid = line[:9].strip()
            synop_id = int(line[9:16].strip())
            name = line[16:49].strip()
            state = line[49:52].strip()
            country = line[52:55].strip()
            lat = int(line[55:61].strip()) / 100.
            lon = int(line[61:68].strip()) / 100.
            alt = int(line[68:74].strip())
            yield stid, Station(stid, synop_id=synop_id, name=name.title(), latitude=lat,
                                longitude=lon, altitude=alt, country=country, state=state,
                                source=input_file)


def _read_master_text_file(input_file=None):
    """Read in the master text file.

    Yields tuple of station ID and `Station` for each entry.
    """
    if input_file is None:
        input_file = get_test_data('master.txt', as_file_obj=False)
    with open(input_file) as station_file:
        station_file.readline()
        for line in station_file:
            state = line[:3].strip()
            name = line[3:20].strip().replace('_', ' ')
            stid = line[20:25].strip()
            synop_id = line[32:38].strip()
            lat = to_dec_deg(line[39:46].strip())
            lon = to_dec_deg(line[47:55].strip())
            alt_part = line[55:60].strip()
            alt = int(alt_part or 0.)
            if stid:
                if stid[0] in ('P', 'K'):
                    country = 'US'
                else:
                    country = state
                    state = '--'
            yield stid, Station(stid, synop_id=synop_id, name=name.title(), latitude=lat,
                                longitude=lon, altitude=alt, country=country, state=state,
                                source=input_file)


def _read_station_text_file(input_file=None):
    """Read the station text file.

    Yields tuple of station ID and `Station` for each entry.
    """
    if input_file is None:
        input_file = get_test_data('stations.txt', as_file_obj=False)
    with open(input_file) as station_file:
        for line in station_file:
            if line[0] == '!':
                continue
            lat = line[39:45].strip()
            if not lat or lat == 'LAT':
                continue
            lat = to_dec_deg(lat)
            state = line[:3].strip()
            name = line[3:20].strip().replace('_', ' ')
            stid = line[20:25].strip()
            synop_id = line[32:38].strip()
            lon = to_dec_deg(line[47:55].strip())
            alt = int(line[55:60].strip())
            country = line[81:83].strip()
            yield stid, Station(stid, synop_id=synop_id, name=name.title(), latitude=lat,
                                longitude=lon, altitude=alt, country=country, state=state,
                                source=input_file)


def _read_airports_file(input_file=None):
    """Read the airports file."""
    if input_file is None:
        input_file = get_test_data('airport-codes.csv', as_file_obj=False)
    df = pd.read_csv(input_file)
    return pd.DataFrame({'id': df.ident.values, 'synop_id': 99999,
                         'latitude': df.latitude_deg.values,
                         'longitude': df.longitude_deg.values,
                         'altitude': units.Quantity(df.elevation_ft.values, 'ft').to('m').m,
                         'country': df.iso_region.str.split('-', n=1, expand=True)[1].values,
                         'source': input_file
                         }).to_dict()


@exporter.export
class StationLookup(Mapping):
    """Look up station information from multiple sources.

    This class follows the `Mapping` protocol with station ID as the key. This makes it
    possible to e.g. iterate over all locations and get all of a certain criteria:

    >>> import metpy.io
    >>> conus_stations = [s for s in metpy.io.station_info if s.startswith('K')]
    >>> conus_stations[:3]
    ['KEET', 'K8A0', 'KALX']
    """

    @cached_property
    def tables(self):
        """Return an iterable mapping combining all the tables."""
        return ChainMap(dict(_read_station_table()),
                        dict(_read_master_text_file()),
                        dict(_read_station_text_file()),
                        dict(_read_airports_file()))

    def __len__(self):
        """Get the number of stations."""
        return len(self.tables)

    def __iter__(self):
        """Allow iteration over the stations."""
        return iter(self.tables)

    def __getitem__(self, stid):
        """Lookup station information from the ID."""
        try:
            return self.tables[stid]
        except KeyError:
            raise KeyError(f'No station information for {stid}') from None


with exporter:
    station_info = StationLookup()


@exporter.export
def add_station_lat_lon(df, stn_var=None):
    """Lookup station information to add the station latitude and longitude to the DataFrame.

    This function will add two columns to the DataFrame ('latitude' and 'longitude') after
    looking up all unique station identifiers available in the DataFrame.

    Parameters
    ----------
    df : `pandas.DataFrame`
        The DataFrame that contains the station observations
    stn_var : str, optional
        The string of the variable name that represents the station in the DataFrame. If not
        provided, 'station', 'stid', and 'station_id' are tried in that order.

    Returns
    -------
    `pandas.DataFrame` that contains original Dataframe now with the latitude and longitude
    values for each location found in :data:`!station_info`.
    """

    def key_finder(df):
        names_to_try = ('station', 'stid', 'station_id')
        for id_name in names_to_try:
            if id_name in df:
                return id_name
        raise KeyError('Second argument not provided to add_station_lat_lon, but none of '
                       f'{names_to_try} were found.')

    df['latitude'] = np.nan
    df['longitude'] = np.nan

    if stn_var is None:
        stn_var = key_finder(df)
    for stn in df[stn_var].unique():
        try:
            info = station_info[stn]
            df.loc[df[stn_var] == stn, 'latitude'] = info.latitude
            df.loc[df[stn_var] == stn, 'longitude'] = info.longitude
        except KeyError:
            df.loc[df[stn_var] == stn, 'latitude'] = np.nan
            df.loc[df[stn_var] == stn, 'longitude'] = np.nan
    return df