1 wk
Test Coverage
An implementation of draft-lajoie-md-query

.. code-block:: bash

    Usage: pyffd <options> {pipeline-files}+

            Turn off caching
    -p <pidfile>
            Write a pidfile at the specified location
            Run in foreground
            Restart pyffd if any of the pipeline files change
    --log=<log> | -l<log>
            Set to either a file or syslog:<facility> (eg syslog:auth)
    --error-log=<log> | --access-log=<log>
            As --log but only affects the error or access log streams.
            Set logging level
            Listen on the specified port
            Listen on the specified interface
            Wake up every <seconds> and run the update pipeline. By
            default the frequency is set to 600.
            Add the mapping 'name: uri' to the toplevel URL alias
            table. This causes URLs on the form http://server/<name>/x
            to be processed as http://server/metadata/{uri}x. The
            default alias table is presented at http://server
            Chdir into <dir> after the server starts up.
            The service is running behind a proxy - respect the X-Forwarded-Host header.
    -m <module>|--modules=<module>
            Load a module

            One or more pipeline files


import importlib
import pkg_resources
import traceback
from six.moves.urllib_parse import urlparse, quote_plus
import os
import sys
from threading import Lock
import cherrypy
from cherrypy._cpdispatch import Dispatcher
from cherrypy._cperror import NotFound, HTTPError
from cherrypy.lib import cptools
from cherrypy.process.plugins import Monitor, SimplePlugin
from cherrypy.lib import caching
from simplejson import dumps
from .constants import config, parse_options
from .locks import ReadWriteLock
from .pipes import plumbing
from .utils import resource_string, duration2timedelta, debug_observer, render_template, hash_id, safe_b64e, safe_b64d
from .logs import get_log, SysLogLibHandler
from .samlmd import entity_simple_summary, entity_display_name, entity_info
from .repo import MDRepository
import logging
from datetime import datetime
from publicsuffix2 import get_public_suffix
from .i18n import language
from . import samlmd
import six

if six.PY2:
    from cgi import escape
    _ = language.ugettext
    from html import escape
    _ = language.gettext

log = get_log(__name__)
site_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "site")

class MDUpdate(Monitor):
    def __init__(self, bus, frequency=int(config.update_frequency), server=None):
        self.lock = Lock()
        self.server = server
        self.bus = bus
        Monitor.__init__(self, bus, lambda:, frequency=frequency)
        self.nruns = 0

    def run(self, server):
        locked = False
            locked = True

            for p in server.plumbings:
                state = {'update': True}
                p.process(, state=state)

            self.server.ready = True
        except Exception as ex:
            if locked:

    def start(self):
        super(MDUpdate, self).start()

    def stop(self):
        super(MDUpdate, self).stop()

    start.priority = 80

class DirPlugin(SimplePlugin):
    def __init__(self, bus, d=None):
        SimplePlugin.__init__(self, bus)
        self.dir = d

    def start(self):

    start.priority = 79

class EncodingDispatcher(object):
    """Cherrypy ass-u-me-s a lot about how requests are processed. In particular it is diffucult to send
    something that contains '/' and ':' (like a URL) using the standard dispatchers. This class provides
    a workaround by base64-encoding the troubling stuff and sending the result through the normal displatch
    pipeline. At the other end base64-encoded data is unpacked.

    def __init__(self, prefixes, enc, next_dispatcher=Dispatcher()):
        self.prefixes = prefixes
        self.enc = enc
        self.next_dispatcher = next_dispatcher

    def dispatch(self, path_info):
        # log.debug("EncodingDispatcher (%s) called with %s" % (",".join(self.prefixes), path_info))
        # vpath = path_info.replace("%2F", "/")
        vpath = path_info
        for prefix in self.prefixes:
            if vpath.startswith(prefix):
                # log.debug("EncodingDispatcher (%s) called with %s" % (",".join(self.prefixes), path_info))
                vpath = path_info.replace("%2F", "/")
                plen = len(prefix)
                vpath = vpath[plen + 1:]
                npath = "%s/%s" % (prefix, self.enc(vpath))
                # log.debug("EncodingDispatcher %s" % npath.encode('ascii', errors='ignore'))
                if six.PY2:
                    npath = npath.encode('ascii', errors='ignore')
                return self.next_dispatcher(npath)
        return self.next_dispatcher(vpath)

class WellKnown(object):
    """Implementation of the .well-known URL namespace for pyFF. In particular this contains the webfinger
    implementation which returns information about up- and downstream metadata.

    def __init__(self, server=None):
        self.server = server

    def webfinger(self, resource=None, rel=None):
        """An implementation the webfinger protocol (
        in order to provide information about up and downstream metadata available at this pyFF instance.


.. code-block:: bash

        # curl http://localhost:8080/.well-known/webfinger?resource=http://localhost:8080

This should result in a JSON structure that looks something like this:

.. code-block:: json

        {"expires": "2013-04-13T17:40:42.188549",
         "links": [
            {"href": "", "rel": "urn:oasis:names:tc:SAML:2.0:metadata"},
            {"href": "", "rel": "disco-json"}],
         "subject": ""}

Depending on which version of pyFF your're running and the configuration you may also see downstream metadata
listed using the 'role' attribute to the link elements.
        if resource is None:
            resource = cherrypy.request.base

        jrd = dict()
        dt = + duration2timedelta("PT1H")
        jrd['expires'] = dt.isoformat()
        jrd['subject'] = cherrypy.request.base
        links = list()
        jrd['links'] = links

        _dflt_rels = {
            'urn:oasis:names:tc:SAML:2.0:metadata': '.xml',
            'disco-json': '.json'

        if rel is None:
            rel = list(_dflt_rels.keys())
            rel = [rel]

        def _links(url):
            if url.startswith('/'):
                url = url.lstrip('/')
            for r in rel:
                suffix = ""
                if not url.endswith('/'):
                    suffix = _dflt_rels[r]
                                  href='%s/%s%s' % (cherrypy.request.base, url, suffix)))

        for a in
            if a is not None and '://' not in a:

        for entity_id in
            _links("/metadata/%s" % hash_id(entity_id))

        for a in list(self.server.aliases.keys()):
            for v in[a]):
                _links('%s/%s' % (a, quote_plus(v)))

        cherrypy.response.headers['Content-Type'] = 'application/json'
        cherrypy.response.headers['Access-Control-Allow-Origin'] = '*'
        return dumps(jrd)

class NotImplementedFunction(object):
    def __init__(self, message):
        self.message = message

    def index(self):
        return self.message

class SHIBDiscovery(object):
    An endpoint designed to provide backwards compatibility with standard shibboleth-based discovery services.

    def __init__(self, server=None):
        self.server = server

    def DS(self, *args, **kwargs):
        kwargs['path'] = "/role/idp.ds"
        kwargs['request_type'] = 'discovery'
        return self.server.request(**kwargs)

    def WAYF(self, *args, **kwargs):
        raise HTTPError(400, _("400 Bad Request - shibboleth WAYF protocol not supported"))

    def default(self, *args, **kwargs):
        log.debug("default args: %s, kwargs: %s" % (repr(args), repr(kwargs)))
        if len(args) > 0 and args[0] in self.server.aliases:
            kwargs['pfx'] = args[0]
            if len(args) > 1:
                kwargs['path'] = args[1]
            return self.server.request(**kwargs)
            kwargs['pfx'] = None
            kwargs['path'] = "/" + "/".join(args)
            return self.server.request(**kwargs)

class MDRoot(object):
    """The root application of pyFF. The root application assembles the MDStats and WellKnown classes with an
    MDServer instance.

    def __init__(self, server):
        self.server = server
        self._well_known.server = server
        self.discovery.server = server

    discovery = SHIBDiscovery()

    _well_known = WellKnown()
    static ="/static", os.path.join(site_dir, "static"))

    def status(self):
        status = "loading"
        if self.server.ready:
            status = "running"
        version = pkg_resources.require("pyFF")[0].version
        cherrypy.response.headers['Content-Type'] = 'application/json'
        cherrypy.response.headers['Access-Control-Allow-Origin'] = '*'
        return dumps({'status': status, 'version': version})

    def shutdown(self):
        cfg =['global']
        if 'allow_shutdown' in cfg and bool(cfg.get('allow_shutdown')):
            from threading import Timer
            Timer(3, cherrypy.engine.exit, ()).start()
            return "bye ..."
        else:  # pragma: nocover
            raise cherrypy.HTTPError(403, _("Endpoint disabled in configuration"))

    @cherrypy.expose, debug=True)
    def robots_txt(self):
        """Returns a robots.txt that disables all robots.
        return """
User-agent: *
Disallow: /

    @cherrypy.expose(), debug=True)
    def storage(self):
        The crossdomain storage hub iframe

        entity_id = cherrypy.request.params.get('entity_id')
        return render_template("storage.html")

    def favicon_ico(self):
        """Returns the pyff icon (the alchemic symbol for sublimation).
        cherrypy.response.headers['Content-Type'] = 'image/x-icon'
        return resource_string('favicon.ico', "site/static/icons")

    def entities(self, path=None):
        """Process an MDX request with Content-Type hard-coded to application/xml. Regardless of the suffix
        you will get XML back from /entities/...
        return self.server.request(path=path, request_type='mdq', content_type="application/xml")

    def metadata(self, path=None):
        """The main request entry point. Any requests are subject to content negotiation based on Accept headers
        and based on file name extension. Requesting /metadata/foo.xml gets you (signed) XML (assuming your pipeline
        contains that mode), requesting /metadata/foo.json gets you json, and /metadata/foo.ds gets you a discovery
        interface based on the IdPs found in 'foo'. Here 'foo' is any supported lookup expression.
        return self.server.request(path=path)

    def about(self):
        """The 'about' page. Contains links to statistics etc.
        import pkg_resources  # part of setuptools

        version = pkg_resources.require("pyFF")[0].version
        return render_template("about.html",
                               sysinfo=" ".join(os.uname()),
                               cmdline=" ".join(sys.argv),

    def reset(self):
        """The /reset page clears all local browser settings for the device. After visiting
        this page users of the discovery service will see a "new device" page.
        return render_template("reset.html")

    def settings(self):
        """The /settings page documents the (non) use of cookies.
        return render_template("settings.html")

    def search(self, query=None, entity_filter=None, related=None):
Search the active set for matching entities.
        :param query: the string query
        :param entity_filter: an optional filter to apply to the active set before searching
        :param related: an optional '+'-separated list of related domain names for prioritizing search results
        :return: a JSON-formatted search result
        cherrypy.response.headers['Content-Type'] = 'application/json'
        cherrypy.response.headers['Access-Control-Allow-Origin'] = '*'
        return dumps(,

    def index(self):
        """Alias for /metadata
        return self.server.request()

    def default(self, *args, **kwargs):
        """The default request processor unpacks base64-encoded reuqests and passes them onto the MDServer.request
        # log.debug("ROOT default args: %s, kwargs: %s" % (repr(args), repr(kwargs)))
        if len(args) > 0 and args[0] in self.server.aliases:
            kwargs['pfx'] = args[0]
            if len(args) > 1:
                kwargs['path'] = args[1]
            return self.server.request(**kwargs)
            kwargs['pfx'] = None
            kwargs['path'] = "/" + "/".join(args)
            return self.server.request(**kwargs)

class MDServer(object):
    """The MDServer class is the business logic of pyFF. This class is isolated from the request-decoding logic
    of MDRoot and from the ancilliary classes like MDStats and WellKnown.

    def __init__(self, pipes=None, observers=None):

        if not observers:
            observers = []
        if not pipes:
            pipes = []
        self._pipes = pipes
        self.lock = ReadWriteLock()
        self.plumbings = [plumbing(v) for v in pipes]
        self.refresh = MDUpdate(cherrypy.engine, server=self, frequency=config.update_frequency)
        self.aliases = config.aliases = MDRepository()
        self.ready = False

        if config.autoreload:
            for f in pipes:

    def reload_pipeline(self):
        new_plumbings = [plumbing(v) for v in self._pipes]
        self.plumbings = new_plumbings

    class MediaAccept(object):

        def __init__(self):

        def has_key(self, key):
            return True

        def get(self, item):
            return self.__getitem__(item)

        def __getitem__(self, item):
                return cptools.accept(item, debug=True)
            except HTTPError:
                return False

    def request(self, **kwargs):
        """The main request processor. This code implements all rendering of metadata.

        if not self.ready:
            raise HTTPError(503, _("Service Unavailable (repository loading)"))

        pfx = kwargs.get('pfx', None)
        path = kwargs.get('path', None)
        content_type = kwargs.get('content_type', None)
        request_type = kwargs.get('request_type', "negotiate")

        log.debug("MDServer pfx=%s, path=%s, content_type=%s" % (pfx, path, content_type))

        def _d(x, do_split=True):
            dot = six.u('.')
            if x is not None:
                x = x.strip()
            # log.debug("_d(%s,%s)" % (x, do_split))
            if x is None or len(x) == 0:
                return None, None

            if x.startswith("{base64}"):
                x = safe_b64d(x[8:])
                if isinstance(x, six.binary_type):
                    x = x.decode()

            if do_split and dot in x:
                (pth, _, extn) = x.rpartition(dot)
                if extn in _ctypes:
                    return pth, extn

            return x, None

        _ctypes = {'xml': 'application/xml',
                   'json': 'application/json',
                   'htm': 'text/html',
                   'html': 'text/html',
                   'ds': 'text/html',
                   's': 'application/json'}

        alias = None
        if pfx:
            alias = pfx
            pfx = self.aliases.get(alias, None)
            if pfx is None:
                raise NotFound()

        path, ext = _d(path, content_type is None)
        if pfx and path:
            q = "{%s}%s" % (pfx, path)
            path = "/%s/%s" % (alias, path)
            q = path

        if ext is not None:
            log.debug("request path: %s.%s, headers: %s" % (path, ext, cherrypy.request.headers))
            log.debug("request path: %s, headers: %s" % (path, cherrypy.request.headers))

        accept = {}
        if content_type is None:
            if ext is not None and ext in _ctypes:
                accept = {_ctypes[ext]: True}
                accept = MDServer.MediaAccept()
                if ext is not None:
                    path = "%s.%s" % (path, ext)
            accept = {content_type: True}

        with self.lock.readlock:
            if ext == 'ds':
                pdict = dict()
                entity_id = kwargs.get('entityID', None)
                if entity_id is None:
                    raise HTTPError(400, _("400 Bad Request - missing entityID"))

                e =
                if e is None or len(e) == 0:
                    raise HTTPError(404)

                if len(e) > 1:
                    raise HTTPError(400, _("Bad Request - multiple matches for") + " %s" % entity_id)

                pdict['entity'] = entity_simple_summary(e[0])
                if not path:
                    pdict['search'] = "/search/"
                    pdict['list'] = "/role/idp.json"
                    pdict['search'] = "{}.s".format(escape(path, quote=True))
                    pdict['list'] = "{}.json".format(escape(path, quote=True))

                pdict['storage'] = "/storage/"
                cherrypy.response.headers['Content-Type'] = 'text/html'
                return render_template(config.ds_template, **pdict)
            elif ext == 's':
                query = kwargs.get('query', None)
                entity_filter = kwargs.get('entity_filter', None)
                related = kwargs.get('related', None)

                cherrypy.response.headers['Content-Type'] = 'application/json'
                cherrypy.response.headers['Access-Control-Allow-Origin'] = '*'

                if query is None:
                    log.debug("empty query - creating one")
                    query = [cherrypy.request.remote.ip]
                    referrer = cherrypy.request.headers.get('referrer', None)
                    if referrer is not None:
                        log.debug("including referrer: %s" % referrer)
                        url = urlparse(referrer)
                        host = url.netloc
                        if ':' in url.netloc:
                            (host, port) = url.netloc.split(':')
                        for host_part in host.rstrip(get_public_suffix(host)).split('.'):
                            if host_part is not None and len(host_part) > 0:
                    log.debug("created query: %s" % ",".join(query))

                return dumps(,
            elif accept.get('text/html'):
                if not q:
                    if pfx:
                        title = pfx
                        title = _("Metadata By Attributes")
                    return render_template("index.html",
                    entities =
                    if not entities:
                        raise NotFound()
                    if len(entities) > 1:
                        return render_template("metadata.html",
                        entity = entities[0]
                        return render_template("entity.html",
                for p in self.plumbings:
                    state = {'request': request_type,
                             'headers': {'Content-Type': 'text/xml'},
                             'accept': accept,
                             'url': cherrypy.url(relative=False),
                             'select': q,
                             'path': path,
                             'stats': {}}
                    r = p.process(, state=state)
                    if r is not None:
                        cache_ttl = state.get('cache', 0)
                        log.debug("caching for %d seconds" % cache_ttl)
                        for k, v in list(state.get('headers', {}).items()):
                            cherrypy.response.headers[k] = v
                        cherrypy.response.headers['Access-Control-Allow-Origin'] = '*'
                        return r
        raise NotFound()

def main():
    The main entrypoint for the pyffd command.
    args = parse_options("pyffd",
                         ['help', 'loglevel=', 'log=', 'access-log=', 'error-log=',
                          'port=', 'host=', 'no-caching', 'autoreload', 'frequency=', 'module=',
                          'alias=', 'dir=', 'version', 'proxy', 'allow_shutdown'])

    engine = cherrypy.engine
    plugins = cherrypy.process.plugins

    if config.daemonize:
        cherrypy.config.update({'environment': 'production'})
        cherrypy.config.update({'log.screen': False})
        if config.error_log is None:
            config.error_log = 'syslog:daemon'
        if config.access_log is None:
            config.access_log = 'syslog:daemon'

    if config.base_dir is not None:
        DirPlugin(engine, config.base_dir).subscribe()

    if config.pid_file:
        plugins.PIDFile(engine, config.pid_file).subscribe()

    def _b64(p):
        if p:
            return "{base64}%s" % safe_b64e(p)
            return ""

    def error_page(code, **kwargs):
        return render_template("%d.html" % code, **kwargs)

    observers = []

    if config.loglevel == logging.DEBUG:

    for mn in config.modules:

    server = MDServer(pipes=args, observers=observers)

    pfx = ["/entities", "/metadata"] + ["/" + x for x in list(server.aliases.keys())]
    cfg = {
        'global': {
            'tools.encode.on': True,
            'tools.encode.text_only': False,
            'tools.encode.encoding': 'UTF-8',
            'server.socket_port': config.port,
            'server.socket_host': config.bind_address,
            'tools.caching.on': config.caching_enabled,
            'tools.caching.debug': config.caching_enabled,
            'tools.trailing_slash.on': True,
            'tools.caching.maxobj_size': 1000000000000,  # effectively infinite
            'tools.caching.maxsize': 1000000000000,
            'tools.caching.antistampede_timeout': 30,
            'tools.caching.delay': 3600,  # this is how long we keep static stuff
            'checker.on': False,
            'log.screen': True,
            'tools.proxy.on': config.proxy,
            'allow_shutdown': config.allow_shutdown,
            'error_page.404': lambda **kwargs: error_page(404, _=_, **kwargs),
            'error_page.503': lambda **kwargs: error_page(503, _=_, **kwargs),
            'error_page.500': lambda **kwargs: error_page(500, _=_, **kwargs),
            'error_page.400': lambda **kwargs: error_page(400, _=_, **kwargs)
        '/': {
            'tools.encode.on': True,
            'tools.encode.encoding': 'UTF-8',
            'tools.caching.delay': config.caching_delay,
            'tools.proxy.on': config.proxy,
            'request.dispatch': EncodingDispatcher(pfx, _b64).dispatch,
            'request.dispatpch.debug': True,
        '/static': {
            'tools.caching.on': config.caching_enabled,
            'tools.caching.delay': config.caching_delay,
            'tools.proxy.on': config.proxy
        '/shutdown': {
            'allow_shutdown': config.allow_shutdown

    if config.error_log is not None:
        cherrypy.config.update({'log.screen': False})

    root = MDRoot(server)
    app = cherrypy.tree.mount(root, config=cfg)
    log_args = {'level': config.loglevel}
    if config.error_log is not None:
        log_args['filename'] = config.error_log

    if config.error_log is not None:
        if config.error_log.startswith('syslog:'):
            facility = config.error_log[7:]
            h = SysLogLibHandler(facility=facility)
            cherrypy.config.update({'log.error_file': ''})
            cherrypy.config.update({'log.error_file': config.error_log})

    if config.access_log is not None:
        if config.access_log.startswith('syslog:'):
            facility = config.access_log[7:]
            h = SysLogLibHandler(facility=facility)
            cherrypy.config.update({'log.access_file': ''})
            cherrypy.config.update({'log.access_file': config.access_log})

    except Exception as ex:

if __name__ == "__main__":