#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author:   Michael E. Rose <>
"""Super class to represent a scientist."""

from warnings import warn

from pybliometrics.scopus import AbstractRetrieval, AffiliationRetrieval
from pybliometrics.scopus.exception import Scopus404Error

from sosia.establishing import connect_database, DEFAULT_DATABASE
from sosia.processing import add_source_names, base_query, count_citations,\
    extract_authors, find_main_affiliation, get_authors, get_main_field,\
    maybe_add_source_names, read_fields_sources_list
from sosia.utils import accepts

class Scientist(object):
    def active_year(self):
        """The scientist's most recent year with publication(s) before
         provided year (which may be the same).
        return self._active_year

    def active_year(self, val):
        self._active_year = val

    def affiliation_country(self):
        """The current country of the affiliation defined in affiliation_id."""
        return self._affiliation_country

    def affiliation_country(self, val):
        self._affiliation_country = val

    def affiliation_id(self):
        """The affiliation ID (as string) of the scientist's most frequent
        affiliation in or before the active year.
        return self._affiliation_id

    def affiliation_id(self, val):
        self._affiliation_id = val

    def affiliation_name(self):
        """The current name of the affiliation defined in affiliation_id."""
        return self._affiliation_name

    def affiliation_name(self, val):
        self._affiliation_name = val

    def affiliation_type(self):
        """The current type of the affiliation defined in affiliation_id."""
        return self._affiliation_type

    def affiliation_type(self, val):
        self.affiliation_type = val

    def citations(self):
        """The citations of the scientist until the provided year."""
        return self._citations

    def citations(self, val):
        self._citations = val

    def citations_period(self):
        """The citations of the scientist during the given period."""
        return self._citations_period

    def citations_period(self, val):
        self._citations_period = val

    def coauthors(self):
        """Set of coauthors of the scientist on all publications until the
        provided year.
        return self._coauthors

    @accepts((set, list, tuple))
    def coauthors(self, val):
        self._coauthors = val

    def coauthors_period(self):
        """Set of coauthors of the scientist on all publications during the
        given period.
        return self._coauthors_period

    @accepts((set, list, tuple))
    def coauthors_period(self, val):
        self._coauthors_period = val

    def fields(self):
        """The fields of the scientist until the provided year, estimated from
        the sources (journals, books, etc.) she published in.
        return self._fields

    @accepts((set, list, tuple))
    def fields(self, val):
        self._fields = val

    def first_year(self):
        """The scientist's year of first publication."""
        return self._first_year

    def first_year(self, val):
        self._first_year = val

    def first_name(self):
        """The scientist's first name."""
        return self._first_name

    def first_name(self, val):
        self._name = val

    def main_field(self):
        """The scientist's main field of research, as tuple in
        the form (ASJC code, general category).

        The main field is the field with the most publications, provided it
        is not Multidisciplinary (ASJC code 1000).  In case of an equal number
        of publications, preference is given to non-general fields (those
        whose ASJC ends on a digit other than 0).
        return self._main_field

    def main_field(self, val):
        if not isinstance(val, tuple) or len(val) != 2:
            raise Exception("Value must be a two-element tuple.")
        self._main_field = val

    def name(self):
        """The scientist's complete name."""
        return self._name

    def name(self, val):
        self._name = val

    def language(self):
        """The language(s) of the scientist published in."""
        return self._language

    def language(self, val):
        self._language = val

    def publications(self):
        """List of the scientists' publications."""
        return self._publications

    @accepts((set, list, tuple))
    def publications(self, val):
        self._publications = val

    def publications_period(self):
        """The publications of the scientist published during
        the given period.
        return self._publications_period

    @accepts((set, list, tuple))
    def publications_period(self, val):
        self._publications_period = val

    def sources(self):
        """The Scopus IDs of sources (journals, books) in which the
        scientist published in.
        return self._sources

    @accepts((list, tuple))
    def sources(self, val):
        self._sources = maybe_add_source_names(val, self.source_names)

    def surname(self):
        """The scientist's surname."""
        return self._surname

    def surname(self, val):
        self._name = val

    def subjects(self):
        """The subject areas of the scientist's publications."""
        return self._subjects

    @accepts((set, list, tuple))
    def subjects(self, val):
        self._subjects = val

    def __init__(self, identifier, year, refresh=False, period=None, eids=None,
        """Class to represent a scientist.

        identifier : list of int
            List of Scopus Author IDs of the scientist.

        year : str or numeric
            Year for which characteristics should be defined for.

        refresh : boolean or int (optional, default=False)
            Whether to refresh cached results (if they exist) or not. If int
            is passed, results will be refreshed if they are older than
            that value in number of days.

        eids : list (optional, default=None)
            A list of scopus EIDs of the publications of the scientist.  If
            it is provided, the scientist's properties are set based on these
            publications, instead of the list of publications obtained from
            the Scopus Author ID(s).

        period: int (optional, default=None)
            In additional starting x years prior to the treatment year,
            which is also used to compute characteristics in the treatment

        sql_fname : str (optional or pathlib.Path(), default=None)
            The path of the SQLite database to connect to.  If None will
            default to `~/.cache/sosia/main.sqlite`.

            When there are no publications for the author until the
            provided year.
        self.identifier = identifier
        self.year = int(year)
        if not sql_fname:
            sql_fname = DEFAULT_DATABASE
        self.sql_conn = connect_database(sql_fname)

        # Read mapping of fields to sources
        fields, info = read_fields_sources_list()
        self.field_source = fields
        self.source_info = info
        source_names = self.source_info.set_index("source_id")["title"].to_dict()
        self.source_names = source_names

        # Load list of publications
        if eids:
            q = f"EID({' OR '.join(eids)})"
            q = f"AU-ID({') OR AU-ID('.join([str(i) for i in identifier])})"
        integrity_fields = ["eid", "author_ids", "coverDate", "source_id"]
        res = base_query("docs", q, refresh, fields=integrity_fields)
        self._publications = [p for p in res if int(p.coverDate[:4]) <= year]
        if not len(self._publications):
            text = "No publications found for author "\
                   f"{'-'.join([str(i) for i in identifier])} until {year}"
            raise Exception(text)
        self._eids = eids or [p.eid for p in self._publications]

        # First year of publication
        pub_years = [p.coverDate[:4] for p in self._publications]
        self._first_year = int(min(pub_years))
        self._period_year = self.year - (period or (self.year+1)) + 1
        if self._period_year < self._first_year:
            self._period_year = 0

        # Count of citations
        search_ids = eids or identifier
        self._citations = count_citations(search_ids, self.year+1, identifier)

        # Coauthors
        self._coauthors = set(extract_authors(self._publications)) - set(identifier)

        # Period counts simply set to total if period is or goes back to None
        if self._period_year:
            pubs = [p for p in self._publications if
                    self._period_year <= int(p.coverDate[:4]) <= year]
            self._publications_period = pubs
            if not len(self._publications_period):
                text = "No publications found for author "\
                       f"{'-'.join([str(i) for i in identifier])} until "\
                       f"{year} in a {self._period_year}-years period"
                raise Exception(text)
            eids_period = [p.eid for p in self._publications_period]
            n_cits = count_citations(eids_period, self.year+1, identifier)
            self._citations_period = n_cits
            self._coauthors_period = set(extract_authors(self._publications_period))
            self._coauthors_period -= set(identifier)
            self._coauthors_period = None
            self._publications_period = None
            self._citations_period = None

        # Author search information
        source_ids = set([int(p.source_id) for p in self._publications
                          if p.source_id])
        self._sources = add_source_names(source_ids, self.source_names)
        self._active_year = int(max(pub_years))
        mask = fields["source_id"].isin(source_ids)
        self._fields = fields[mask]["asjc"].astype(int).tolist()
        self._main_field = get_main_field(self._fields)
        if not self._main_field[0]:
            text = "Not possible to determine research field(s) of "\
                   "researcher.  Functionality is reduced."
            warn(text, UserWarning)

        # Most recent geolocation
        afid = find_main_affiliation(identifier, self._publications, year)
        self._affiliation_id = afid
            aff = AffiliationRetrieval(afid, refresh=refresh)
            self._affiliation_country =
            self._affiliation_name = aff.affiliation_name
            self._affiliation_type = aff.org_type
        except (Scopus404Error, ValueError):
            self._affiliation_country = None
            self._affiliation_name = None
            self._affiliation_type = None
        self._language = None

        # Author name from profile with most documents
        df = get_authors(self.identifier, self.sql_conn,
                         refresh=refresh, verbose=False)
        au = df.sort_values("documents", ascending=False).iloc[0]
        self._subjects = [a.split(" ")[0] for a in au.areas.split("; ")]
        self._surname = au.surname or None
        self._first_name = au.givenname or None
        name = ", ".join([self._surname or "", au.givenname or ""])
        if name == ", ":
            name = None
        self._name = name

    def get_publication_languages(self, refresh=False):
        """Parse languages of published documents."""
        langs = set()
        for eid in self._eids:
                ab = AbstractRetrieval(eid, view="FULL", refresh=refresh)
            except Scopus404Error:
        self._language = "; ".join(sorted(filter(None, langs)))
        return self