includes/class-linkbacks-mf2-handler.php
<?php
use Mf2\Parser;
/**
* provides a microformats handler for the semantic linkbacks
* WordPress plugin
*
* @author Matthias Pfefferle
*/
class Linkbacks_MF2_Handler {
/**
* initialize the plugin, registering WordPess hooks.
*/
public static function init() {
add_filter( 'semantic_linkbacks_commentdata', array( 'Linkbacks_MF2_Handler', 'generate_commentdata' ), 1 );
}
/**
* all supported url types
*
* @return array
*/
public static function get_class_mapper() {
$class_mapper = array();
/*
* replies
* @link http://indieweb.org/replies
*/
$class_mapper['in-reply-to'] = 'reply';
$class_mapper['reply'] = 'reply';
$class_mapper['reply-of'] = 'reply';
/*
* repost
* @link http://indieweb.org/repost
*/
$class_mapper['repost'] = 'repost';
$class_mapper['repost-of'] = 'repost';
/*
* likes
* @link http://indieweb.org/likes
*/
$class_mapper['like'] = 'like';
$class_mapper['like-of'] = 'like';
/*
* favorite
* @link http://indieweb.org/favorite
*/
$class_mapper['favorite'] = 'favorite';
$class_mapper['favorite-of'] = 'favorite';
/*
* bookmark
* @link http://indieweb.org/bookmark
*/
$class_mapper['bookmark'] = 'bookmark';
$class_mapper['bookmark-of'] = 'bookmark';
/*
* rsvp
* @link http://indieweb.org/rsvp
*/
$class_mapper['rsvp'] = 'rsvp';
/*
* invite
* @link https://indieweb.org/invitation
*/
$class_mapper['invitee'] = 'invite';
/*
* tag
* @link http://indieweb.org/tag
*/
$class_mapper['tag-of'] = 'tag';
$class_mapper['category'] = 'tag';
/*
* read
* @link http://indieweb.org/read
*/
$class_mapper['read-of'] = 'read';
$class_mapper['read'] = 'read';
/*
* listen
* @link http://indieweb.org/listen
*/
$class_mapper['listen-of'] = 'listen';
$class_mapper['listen'] = 'listen';
/*
* watch
* @link http://indieweb.org/watch
*/
$class_mapper['watch-of'] = 'watch';
$class_mapper['watch'] = 'watch';
/*
* follow
* @link http://indieweb.org/follow
*/
$class_mapper['follow-of'] = 'follow';
return apply_filters( 'semantic_linkbacks_microformats_class_mapper', $class_mapper );
}
/**
* all supported url types
*
* @return array
*/
public static function get_rel_mapper() {
$rel_mapper = array();
/*
* replies
* @link http://indieweb.org/in-reply-to
*/
$rel_mapper['in-reply-to'] = 'reply';
$rel_mapper['reply-of'] = 'reply';
return apply_filters( 'semantic_linkbacks_microformats_rel_mapper', $rel_mapper );
}
/**
* generate the comment data from the microformatted content
*
* @param WP_Comment $commentdata the comment object
*
* @return array
*/
public static function generate_commentdata( $commentdata ) {
// Use new webmention source meta key.
if ( array_key_exists( 'webmention_source_url', $commentdata['comment_meta'] ) ) {
$source = $commentdata['comment_meta']['webmention_source_url'];
} else { // Fallback to comment author url.
$source = $commentdata['comment_author_url'];
}
if ( ! class_exists( 'Mf2\Parser' ) ) {
require_once plugin_dir_path( __DIR__ ) . 'vendor/mf2/mf2/Mf2/Parser.php';
}
// parse source html
$parser = new Parser( $commentdata['remote_source_original'], $source );
$mf_array = $parser->parse( true );
// check for rel-alternate links
$alternate_source = self::get_alternate_source( $mf_array );
if ( $alternate_source ) {
$mf_array = $alternate_source;
}
// get all 'relevant' entries
$entries = self::get_entries( $mf_array );
if ( empty( $entries ) ) {
return $commentdata;
}
// get the entry of interest
$entry = self::get_representative_entry( $entries, $commentdata['target'] );
if ( empty( $entry ) ) {
return $commentdata;
}
$commentdata['remote_source_mf2'] = $entry;
$properties = array_filter( self::flatten_microformats( $entry ) );
$commentdata['remote_source_properties'] = $properties;
$rels = $mf_array['rels'];
$commentdata['remote_source_rels'] = $rels;
// try to find some content
// @link http://indieweb.org/comments-presentation
if ( isset( $properties['summary'] ) ) {
$commentdata['comment_content'] = wp_slash( $properties['summary'] );
} elseif ( isset( $properties['content'] ) ) {
$commentdata['comment_content'] = wp_filter_kses( $properties['content']['html'] );
} elseif ( isset( $properties['name'] ) ) {
$commentdata['comment_content'] = wp_slash( $properties['name'] );
}
$commentdata['comment_content'] = trim( $commentdata['comment_content'] );
// set the right date
if ( isset( $properties['published'] ) ) {
$commentdata['comment_date'] = self::convert_time( $properties['published'] );
} elseif ( isset( $properties['updated'] ) ) {
$commentdata['comment_date'] = self::convert_time( $properties['updated'] );
}
$commentdata['comment_date_gmt'] = get_gmt_from_date( $commentdata['comment_date'] );
$author = null;
// check if h-card has an author
if ( isset( $properties['author'] ) ) {
$author = $properties['author'];
} else {
$author = self::get_representative_author( $mf_array, $source );
$author = self::flatten_microformats( $author );
}
// If this is an invite than the invite should be displayed instead of the author
if ( isset( $properties['invitee'] ) ) {
$author = $properties['invitee'];
}
// if author is present use the informations for the comment
if ( $author ) {
if ( ! is_array( $author ) ) {
if ( self::is_url( $author ) ) {
$response = Linkbacks_Handler::retrieve( $author );
if ( ! is_wp_error( $response ) ) {
$parser = new Parser( wp_remote_retrieve_body( $response ), $author );
$author_array = $parser->parse( true );
$author = self::flatten_microformats( self::get_representative_author( $author_array, $author ) );
$properties['author'] = $author;
}
} else {
$commentdata['comment_author'] = wp_slash( $author );
}
}
if ( is_array( $author ) ) {
if ( ! isset( $author['me'] ) ) {
if ( isset( $mf_array['rels']['me'] ) ) {
$author['me'] = $mf_array['rels']['me'];
$properties['author']['me'] = $author['me'];
}
}
if ( isset( $author['name'] ) ) {
$commentdata['comment_author'] = wp_slash( self::first( $author['name'] ) );
}
if ( isset( $author['email'] ) ) {
$commentdata['comment_author_email'] = wp_slash( self::first( $author['email'] ) );
}
if ( isset( $author['url'] ) ) {
$commentdata['comment_meta']['semantic_linkbacks_author_url'] = self::first( $author['url'] );
}
if ( isset( $author['photo'] ) ) {
$commentdata['comment_meta']['avatar'] = self::first( $author['photo'] );
}
}
}
// set canonical url (u-url)
if ( isset( $properties['url'] ) ) {
$commentdata['comment_meta']['semantic_linkbacks_canonical'] = self::first( $properties['url'] );
// If the source URL and the canonical URL are not on the same domain, set this flag in the comment data to trigger actions in the Linkbacks_Handler class
if ( wp_parse_url( $properties['url'], PHP_URL_HOST ) !== wp_parse_url( $source, PHP_URL_HOST ) ) {
$commentdata['proxy_mention'] = 1;
}
} else {
$commentdata['comment_meta']['semantic_linkbacks_canonical'] = esc_url_raw( $source );
}
// If u-syndication is not set use rel syndication
if ( array_key_exists( 'syndication', $rels ) && ! array_key_exists( 'syndication', $properties ) ) {
$properties['syndication'] = $rels['syndication'];
}
// Check and parse for location property
if ( array_key_exists( 'location', $properties ) ) {
$location = $properties['location'];
if ( is_array( $location ) ) {
if ( array_key_exists( 'latitude', $location ) ) {
$commentdata['comment_meta']['geo_latitude'] = self::first( $location['latitude'] );
}
if ( array_key_exists( 'longitude', $location ) ) {
$commentdata['comment_meta']['geo_longitude'] = self::first( $location['longitude'] );
}
if ( array_key_exists( 'name', $location ) ) {
$commentdata['comment_meta']['geo_address'] = self::first( $location['name'] );
}
} else {
if ( substr( $location, 0, 4 ) === 'geo:' ) {
$geo = explode( ':', substr( urldecode( $location ), 4 ) );
$geo = explode( ';', $geo[0] );
$coords = explode( ',', $geo[0] );
$commentdata['comment_meta']['geo_latitude'] = trim( $coords[0] );
$commentdata['comment_meta']['geo_longitude'] = trim( $coords[1] );
} else {
$commentdata['comment_meta']['geo_address'] = $location;
}
}
}
// check rsvp property
if ( isset( $properties['rsvp'] ) ) {
$commentdata['comment_meta']['semantic_linkbacks_type'] = wp_slash( 'rsvp:' . self::first( $properties['rsvp'] ) );
} else {
// get post type
$commentdata['comment_meta']['semantic_linkbacks_type'] = wp_slash( self::get_entry_type( $commentdata['target'], $entry, $mf_array ) );
}
if ( isset( $properties['invitee'] ) ) {
$commentdata['comment_meta']['semantic_linkbacks_type'] = wp_slash( 'invite' );
}
// Check for person tagging
if ( isset( $properties['category'] ) ) {
if ( in_array( $commentdata['target'], $properties['category'], true ) ) {
$commentdata['category'] = array( $commentdata['target'] );
}
}
$whitelist = array(
'author',
'location',
'syndication',
'uid',
'video',
'audio',
'photo',
'featured',
'swarm-coins', // https://ownyourswarm.p3k.io/docs#coins
'read-status', // https://indieweb.org/read
);
// Add in supported properties
$whitelist = array_merge( $whitelist, array_keys( self::get_class_mapper() ) );
$whitelist = apply_filters( 'semantic_linkbacks_mf2_props_whitelist', $whitelist );
foreach ( $properties as $key => $value ) {
if ( in_array( $key, $whitelist, true ) ) {
if ( self::is_url( $value ) ) {
$value = esc_url_raw( $value );
}
if ( is_string( $value ) ) {
$value = wp_kses_post( $value );
}
$commentdata['comment_meta'][ 'mf2_' . $key ] = $value;
}
}
$commentdata['comment_meta'] = array_filter( $commentdata['comment_meta'] );
return $commentdata;
}
public static function convert_time( $time ) {
$time = strtotime( $time );
// If it can't read the time it will return null which will mean the comment time will be set to now.
if ( $time ) {
return get_date_from_gmt( gmdate( 'Y-m-d H:i:s', $time ), 'Y-m-d H:i:s' );
}
return null;
}
public static function get_property( $key, $properties ) {
if ( isset( $properties[ $key ] ) && isset( $properties[ $key ][0] ) ) {
if ( is_array( $properties[ $key ] ) ) {
$properties[ $key ] = array_unique( $properties[ $key ] );
}
if ( 1 === count( $properties[ $key ] ) ) {
return $properties[ $key ][0];
}
return $properties[ $key ];
}
return null;
}
/**
* Returns the first item in $val if it's a non-empty array, otherwise $val itself.
*/
public static function first( $val ) {
if ( $val && is_array( $val ) ) {
return $val[0];
}
return $val;
}
/**
* Is string a URL.
*
* @param array $string
* @return bool
*/
public static function is_url( $string ) {
if ( ! is_string( $string ) ) {
return false;
}
// If debugging is on just validate that URL is validly formatted
if ( WP_DEBUG ) {
return filter_var( $string, FILTER_VALIDATE_URL ) !== false;
}
// If debugging is off limit based on WordPress parameters
return wp_http_validate_url( $string );
}
// Accepted h types
public static function is_h( $string ) {
return in_array( $string, array( 'h-cite', 'h-entry', 'h-feed', 'h-product', 'h-event', 'h-review', 'h-recipe' ), true );
}
public static function flatten_microformats( $item ) {
$flat = array();
if ( 1 === count( $item ) ) {
$item = $item[0];
}
if ( array_key_exists( 'type', $item ) ) {
// If there are multiple types strip out everything but the standard one.
if ( 1 < count( $item['type'] ) ) {
$item['type'] = array_filter( $item['type'], array( 'Linkbacks_MF2_Handler', 'is_h' ) );
}
$flat['type'] = $item['type'][0];
}
if ( array_key_exists( 'properties', $item ) ) {
$properties = $item['properties'];
foreach ( $properties as $key => $value ) {
$flat[ $key ] = self::get_property( $key, $properties );
if ( ! is_string( $flat[ $key ] ) && 1 < count( $flat[ $key ] ) ) {
$flat[ $key ] = self::flatten_microformats( $flat[ $key ] );
}
}
} else {
$flat = $item;
}
foreach ( $flat as $key => $value ) {
// Sanitize all URL properties
if ( self::is_url( $value ) ) {
$flat[ $key ] = esc_url_raw( $value );
}
}
// If name and URL are the same, remove name.
if ( array_key_exists( 'name', $flat ) && array_key_exists( 'url', $flat ) ) {
if ( $flat['name'] === $flat['url'] ) {
unset( $flat['name'] );
}
}
// Duplicate url values for a property may be caused by implied urls https://github.com/indieweb/php-mf2/issues/110
if ( array_key_exists( 'url', $flat ) && is_array( $flat['url'] ) ) {
$flat['url'] = $flat['url'][0];
}
$flat = array_filter( $flat );
return $flat;
}
public static function has_alternate_url( $mf_array ) {
return (bool) self::get_alternate_url( $mf_array );
}
public static function get_alternate_url( $mf_array ) {
if ( ! array_key_exists( 'rel-urls', $mf_array ) ) {
return false;
}
foreach ( $mf_array['rel-urls'] as $url => $meta ) {
if (
isset( $meta['type'] ) &&
'application/mf2+json' === trim( $meta['type'] ) &&
isset( $meta['rels'] ) &&
in_array( 'alternate', $meta['rels'], true ) &&
filter_var( $url, FILTER_VALIDATE_URL ) !== false
) {
return $url;
}
}
return false;
}
public static function get_alternate_source( $mf_array ) {
$url = self::get_alternate_url( $mf_array );
if ( ! $url ) {
return false;
}
$user_agent = apply_filters( 'http_headers_useragent', 'WordPress/' . $wp_version . '; ' . get_bloginfo( 'url' ) );
$args = array(
'timeout' => 100,
'limit_response_size' => 153600,
'redirection' => 20,
'user-agent' => "$user_agent; Semantic-Linkbacks/Webmention read rel-alternate source",
);
$response = wp_safe_remote_get( $url, $args );
// check if source is accessible
if ( is_wp_error( $response ) ) {
return false;
}
$body = wp_remote_retrieve_body( $response );
return json_decode( $body, true );
}
/**
* get all h-entry items
*
* @param array $mf_array the microformats array
* @param array the h-entry array
*
* @return array
*/
public static function get_entries( $mf_array ) {
$entries = array();
// some basic checks
if ( ! is_array( $mf_array ) ) {
return $entries;
}
if ( ! isset( $mf_array['items'] ) ) {
return $entries;
}
if ( 0 === count( $mf_array['items'] ) ) {
return $entries;
}
// get first item
$first_item = $mf_array['items'][0];
// check if it is an h-feed
if ( isset( $first_item['type'] ) &&
in_array( 'h-feed', $first_item['type'], true ) &&
isset( $first_item['children'] ) ) {
$mf_array['items'] = $first_item['children'];
}
// iterate array
foreach ( $mf_array['items'] as $mf ) {
if ( isset( $mf['type'] ) && in_array( 'h-entry', $mf['type'], true ) ) {
$entries[] = $mf;
}
}
// return entries
return $entries;
}
/**
* helper to find the correct author node
*
* @param array $mf_array the parsed microformats array
* @param string $source the source url
*
* @return array|null the h-card node or null
*/
public static function get_representative_author( $mf_array, $source ) {
foreach ( $mf_array['items'] as $mf ) {
if ( isset( $mf['type'] ) ) {
if ( in_array( 'h-card', $mf['type'], true ) ) {
// check domain
if ( isset( $mf['properties'] ) && isset( $mf['properties']['url'] ) ) {
foreach ( $mf['properties']['url'] as $url ) {
if ( wp_parse_url( $url, PHP_URL_HOST ) === wp_parse_url( $source, PHP_URL_HOST ) ) {
if ( isset( $mf_array['rels']['me'] ) ) {
$mf['properties']['me'] = $mf_array['rels']['me'];
}
return $mf;
}
}
}
}
}
}
// If there is no h-card then return rel=author and see what can be done with it
if ( isset( $mf_array['rels']['author'] ) ) {
return $mf_array['rels']['author'];
}
return null;
}
/**
* helper to find the correct h-entry node
*
* @param array $mf_array the parsed microformats array
* @param string $target the target url
*
* @return array the h-entry node or false
*/
public static function get_representative_entry( $entries, $target ) {
// iterate array
foreach ( $entries as $entry ) {
// check properties
if ( isset( $entry['properties'] ) ) {
// check properties if target urls was mentioned
foreach ( $entry['properties'] as $key => $values ) {
// check "normal" links
if ( self::compare_urls( $target, $values ) ) {
return $entry;
}
// check included h-* formats and their links
foreach ( $values as $obj ) {
// check if reply is a "cite"
if ( isset( $obj['type'] ) && array_intersect( array( 'h-cite', 'h-entry' ), $obj['type'] ) ) {
// check url
if ( isset( $obj['properties'] ) && isset( $obj['properties']['url'] ) ) {
// check target
if ( self::compare_urls( $target, $obj['properties']['url'] ) ) {
return $entry;
}
}
}
}
}
// check properties if target urls was mentioned
foreach ( $entry['properties'] as $key => $values ) {
// check content for the link
if ( 'content' === $key &&
preg_match_all( '/<a[^>]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $values[0]['html'], $context ) ) {
return $entry;
} elseif ( 'summary' === $key &&
preg_match_all( '/<a[^>]+?' . preg_quote( $target, '/' ) . '[^>]*>([^>]+?)<\/a>/i', $values[0], $context ) ) {
return $entry;
}
}
}
}
// return first h-entry
return $entries[0];
}
/**
* check entry classes or document rels for post-type
*
* @param string $target the target url
* @param array $entry the represantative entry
* @param array $mf_array the document
*
* @return string the post-type
*/
public static function get_entry_type( $target, $entry, $mf_array = array() ) {
$classes = self::get_class_mapper();
// check properties for target-url
foreach ( $entry['properties'] as $key => $values ) {
// check u-* params
if ( in_array( $key, array_keys( $classes ), true ) ) {
// check "normal" links
if ( self::compare_urls( $target, $values ) ) {
return $classes[ $key ];
}
// iterate in-reply-tos
foreach ( $values as $obj ) {
// check if reply is a "cite" or "entry"
if ( isset( $obj['type'] ) && array_intersect( array( 'h-cite', 'h-entry' ), $obj['type'] ) ) {
// check url
if ( isset( $obj['properties'] ) && isset( $obj['properties']['url'] ) ) {
// check target
if ( self::compare_urls( $target, $obj['properties']['url'] ) ) {
return $classes[ $key ];
}
}
}
}
}
}
// check if site has any rels
if ( ! isset( $mf_array['rels'] ) ) {
return 'mention';
}
$rels = self::get_rel_mapper();
// check rels for target-url
foreach ( $mf_array['rels'] as $key => $values ) {
// check rel params
if ( in_array( $key, array_keys( $rels ), true ) ) {
foreach ( $values as $value ) {
if ( $value === $target ) {
return $rels[ $key ];
}
}
}
}
return 'mention';
}
/**
* compare an url with a list of urls
*
* @param string $needle the target url
* @param array $haystack a list of urls
* @param boolean $schemelesse define if the target url should be checked with http:// and https://
*
* @return boolean
*/
public static function compare_urls( $needle, $haystack, $schemeless = true ) {
if ( ! self::is_url( $needle ) ) {
return false;
}
if ( is_array( reset( $haystack ) ) ) {
return false;
}
if ( true === $schemeless ) {
// remove url-scheme
$schemeless_target = preg_replace( '/^https?:\/\//i', '', $needle );
// add both urls to the needle
$needle = array( 'http://' . $schemeless_target, 'https://' . $schemeless_target );
} else {
// make $needle an array
$needle = array( $needle );
}
// compare both arrays
return array_intersect( $needle, $haystack );
}
}