wikimedia/mediawiki-extensions-DonationInterface

View on GitHub
gateway_common/DataValidator.php

Summary

Maintainability
D
2 days
Test Coverage
<?php

use SmashPig\Core\ValidationError;

/**
 * DataValidator
 * This class is responsible for performing all kinds of data validation,
 * wherever we may need it.
 *
 * All functions should be static, so we don't have to construct anything in
 * order to use it any/everywhere.
 *
 * @author khorn
 * @author awight
 */
class DataValidator {

    /**
     * getErrorToken, intended to be used by classes that exist relatively close
     * to the form classes, returns the error token (defined on the forms) that
     * specifies *where* the error will appear within the form output.
     * @param string $field The field that ostensibly has an error that needs to
     * be displayed to the user.
     * @return string The error token corresponding to a field
     */
    public static function getErrorToken( $field ) {
        switch ( $field ) {
            case 'employer':
            case 'email':
            case 'amount':
            case 'currency':
            case 'fiscal_number':
            case 'issuer_id':
            case 'card_num':
            case 'cvv':
            case 'first_name':
            case 'last_name':
            case 'city':
            case 'street_address':
            case 'state_province':
            case 'postal_code':
            case 'expiration':
                $error_token = $field;
                break;
            default:
                $error_token = 'general';
                break;
        }
        return $error_token;
    }

    /**
     * getEmptyErrorArray
     * @deprecated
     * This only exists anymore, to make badly-coded forms happy when they start
     * pulling keys all over the place without checking to see if they're set or
     * not.
     * @return array All the possible error tokens as keys, with blank errors.
     */
    public static function getEmptyErrorArray() {
        return [
            'general' => '',
            'retryMsg' => '',
            'amount' => '',
            'card_num' => '',
            'cvv' => '',
            'fiscal_number' => '',
            'first_name' => '',
            'last_name' => '',
            'city' => '',
            'street_address' => '',
            'state_province' => '',
            'postal_code' => '',
            'email' => '',
        ];
    }

    /**
     * getError - returns the error object appropriate for a validation error
     * on the specified field, of the specified type.
     *
     * @param string $field - The common name of the field containing the data
     * that is causing the error.
     * @param string $type - The type of error being caused, from a set.
     *    Possible values are:
     *        'not_empty' - the value is required and not currently present
     *        'valid_type' - in general, the wrong format
     *        'calculated' - fields that failed some kind of multiple-field data
     * integrity check.
     *
     * @return ValidationError
     */
    public static function getError( $field, $type ) {
        // NOTE: We are just using the next bit because it's convenient.
        // getErrorToken is actually for something entirely different:
        // Figuring out where on the form the error should land.
        $token = self::getErrorToken( $field );

        // Empty messages
        if ( $type === 'not_empty' ) {
            if ( $token != 'general' ) {
                $missingErrorKey = "donate_interface-error-msg-{$token}";
                return new ValidationError( $token, $missingErrorKey );
            }
        }

        if ( $type === 'valid_type' || $type === 'calculated' ) {
            $invalidErrorKey = "donate_interface-error-msg-invalid-{$token}";
            return new ValidationError( $token, $invalidErrorKey );
        }

        // ultimate defaultness.
        return new ValidationError( $token, 'donate_interface-error-msg-general' );
    }

    /**
     * validate
     * Run all the validation rules we have defined against a (hopefully
     * normalized) DonationInterface data set.
     * @param GatewayType $gateway
     * @param array $data Normalized donation data.
     * @param array $check_not_empty An array of fields to do empty validation
     * on. If this is not populated, no fields will throw errors for being empty,
     * UNLESS they are required for a field that uses them for more complex
     * validation (the 'calculated' phase).
     * @throws BadMethodCallException
     * @return array A list of ValidationError objects, or empty on successful validation.
     */
    public static function validate( GatewayType $gateway, $data, $check_not_empty = [] ) {
        // return the array of errors that should be generated on validate.
        // just the same way you'd do it if you were a form passing the error array around.

        /**
         * We need to run the validation in an order that makes sense.
         *
         * First: If we need to validate that some things are not empty, do that.
         * Second: Do regular data type validation on things that are not empty.
         * Third: Do validation that depends on multiple fields (making sure you
         * validated that all the required fields exist on step 1).
         *
         * How about we build an array of shit to do,
         * look at it to make sure it's complete, and in order...
         * ...and do it.
         */

        // Define all default validations.
        $validations = [
            'not_empty' => [
                'currency',
                'gateway',
            ],
            'valid_type' => [
                '_cache_' => 'validate_boolean',
                'account_number' => 'validate_numeric',
                'anonymous' => 'validate_boolean',
                'contribution_tracking_id' => 'validate_numeric',
                'currency' => 'validate_alphanumeric',
                'gateway' => 'validate_alphanumeric',
                'numAttempt' => 'validate_numeric',
                'opt-in' => 'validate_boolean',
                'posted' => 'validate_boolean',
                'recurring' => 'validate_boolean',
            ],
            // Note that order matters for this group, dependencies must come first.
            'calculated' => [
                'gateway' => 'validate_gateway',
                'address' => 'validate_address',
                'city' => 'validate_address',
                'email' => 'validate_email',
                'street_address' => 'validate_address',
                'postal_code' => 'validate_address',
                'currency' => 'validate_currency_code',
                'first_name' => 'validate_name',
                'last_name' => 'validate_name',
                'name' => 'validate_name',
                'employer' => 'validate_name',
                'employer_id' => 'validate_numeric',
            ],
        ];

        // Additional fields we should check for emptiness.
        if ( $check_not_empty ) {
            $validations['not_empty'] = array_unique( array_merge(
                $check_not_empty, $validations['not_empty']
            ) );
        }

        $errors = [];
        $errored_fields = [];
        $results = [];

        foreach ( $validations as $phase => $fields ) {
            foreach ( $fields as $key => $custom ) {
                // Here we decode list vs map elements.
                if ( is_numeric( $key ) ) {
                    $field = $custom;
                    $validation_function = "validate_{$phase}";
                } else {
                    $field = $key;
                    $validation_function = $custom;
                }

                $value = $data[$field] ?? null;
                if ( !$value && $phase !== 'not_empty' ) {
                    // Skip if not required and nothing to validate.
                    continue;
                }

                // Skip if we've already determined this field group is invalid.
                $errorToken = self::getErrorToken( $field );
                if ( array_key_exists( $errorToken, $errored_fields ) ) {
                    continue;
                }

                // Prepare to call the thing.
                $callable = [ 'DataValidator', $validation_function ];
                if ( !is_callable( $callable ) ) {
                    throw new BadMethodCallException( __FUNCTION__ . " BAD PROGRAMMER. No function {$validation_function} for $field" );
                }
                $result = null;
                // Handle special cases.
                switch ( $validation_function ) {
                    case 'validate_currency_code':
                        $result = call_user_func( $callable, $value, $gateway->getCurrencies( $data ) );
                        break;
                    default:
                        $result = call_user_func( $callable, $value );
                        break;
                }

                // Store results.
                $results[$phase][$field] = $result;
                if ( $result === false ) {
                    // We did the check, and it failed.
                    $errored_fields[$errorToken] = true;
                    $errors[] = self::getError( $field, $phase );
                }
            }
        }

        return $errors;
    }

    /**
     * checkValidationPassed is a validate helper function.
     * In order to determine that we are ready to do the third stage of data
     * validation (calculated) for any given field, we need to determine that
     * all fields required to validate the original have, themselves, passed
     * validation.
     * @param array $fields An array of field names to check.
     * @param array $results Intermediate result of validation.
     * @return bool true if all fields specified in $fields passed their
     * not_empty and valid_type validation. Otherwise, false.
     */
    protected static function checkValidationPassed( $fields, $results ) {
        foreach ( $fields as $field ) {
            foreach ( $results as $phase => $results_fields ) {
                if ( array_key_exists( $field, $results_fields )
                    && $results_fields[$field] !== true
                ) {
                    return false;
                }
            }
        }
        return true;
    }

    /**
     * validate_email
     * Determines if the $value passed in is a valid email address.
     * @param string $value The piece of data that is supposed to be an email
     * address.
     * @return bool True if $value is a valid email address, otherwise false.
     */
    protected static function validate_email( $value ) {
        return WmfFramework::validateEmail( $value )
            && !self::cc_number_exists_in_str( $value )
            && !self::special_characters_in_wrong_locations( $value );
    }

    protected static function validate_currency_code( $value, $acceptedCurrencies ) {
        if ( !$value ) {
            return false;
        }

        return in_array( $value, $acceptedCurrencies );
    }

    /**
     * validate_credit_card
     * Determines if the $value passed in is (possibly) a valid credit card number.
     * @param string $value The piece of data that is supposed to be a credit card number.
     * @return bool True if $value is a reasonable credit card number, otherwise false.
     */
    protected static function validate_credit_card( $value ) {
        $calculated_card_type = self::getCardType( $value );
        if ( !$calculated_card_type ) {
            return false;
        }
        return true;
    }

    /**
     * validate_boolean
     * Determines if the $value passed in is a valid boolean.
     * @param string $value The piece of data that is supposed to be a boolean.
     * @return bool True if $value is a valid boolean, otherwise false.
     */
    protected static function validate_boolean( $value ) {
        $validValues = [
            0,
            '0',
            false,
            'false',
            1,
            '1',
            true,
            'true',
        ];
        return in_array( $value, $validValues, true );
    }

    /**
     * validate_numeric
     * Determines if the $value passed in is numeric.
     * @param string $value The piece of data that is supposed to be numeric.
     * @return bool True if $value is numeric, otherwise false.
     */
    protected static function validate_numeric( $value ) {
        // instead of validating here, we should probably be doing something else entirely.
        if ( is_numeric( $value ) ) {
            return true;
        }
        return false;
    }

    /**
     * validate_gateway
     * Checks to make sure the gateway is populated with a valid and enabled
     * gateway.
     * @param string $value The value that is meant to be a gateway.
     * @return bool True if $value is a valid gateway, otherwise false
     */
    protected static function validate_gateway( $value ) {
        global $wgDonationInterfaceGatewayAdapters;

        return array_key_exists( $value, $wgDonationInterfaceGatewayAdapters );
    }

    /**
     * validate_not_empty
     * Checks to make sure that the $value is present in the $data array, and not null or an empty string.
     * Anything else that is 'falseish' is still perfectly valid to have as a data point.
     * TODO: Consider doing this in a batch.
     * @param string $value The value to check for non-emptiness.
     * @return bool True if the $value is not missing or empty, otherwise false.
     */
    protected static function validate_not_empty( $value ) {
        return ( $value !== null && $value !== '' );
    }

    /**
     * validate_alphanumeric
     * Checks to make sure the value is populated with an alphanumeric value...
     * ...which would be great, if it made sense at all.
     * TODO: This is duuuuumb. Make it do something good, or get rid of it.
     * If we can think of a way to make this useful, we should do something here.
     * @param string $value The value that is meant to be alphanumeric
     * @return bool True if $value is ANYTHING. Or not. :[
     */
    protected static function validate_alphanumeric( $value ) {
        return true;
    }

    /**
     * Validates that somebody didn't just punch in a bunch of punctuation, and
     * nothing else. Doing so for certain fields can short-circuit AVS checking
     * at some banks, and so we want to treat data like this as empty in the
     * adapter staging phase.
     * @param string $value The value to check
     * @return bool true if it's more than just punctuation, false if it is.
     */
    public static function validate_not_just_punctuation( $value ) {
        $value = html_entity_decode( $value ); // Just making sure.
        $regex = '/([\x20-\x2F]|[\x3A-\x40]|[\x5B-\x60]|[\x7B-\x7E]){' . strlen( $value ) . '}/';
        if ( preg_match( $regex, $value ) ) {
            return false;
        }
        return true;
    }

    /**
     * Some people are silly and enter their CC numbers as their name. This performs a luhn check
     * on the name to make sure it's not actually a potentially valid CC number.
     *
     * @param string $value Ze name!
     * @return bool True if the name is not suspiciously like a CC number
     */
    public static function validate_name( $value ) {
        return !self::cc_number_exists_in_str( $value ) &&
            !self::obviousXssInString( $value );
    }

    /**
     * Gets rid of numbers that pass luhn in address fields - @see validate_name
     * @param string $value
     * @return bool True if suspiciously like a CC number
     */
    public static function validate_address( $value ) {
        return !self::cc_number_exists_in_str( $value ) &&
            !self::obviousXssInString( $value );
    }

    public static function obviousXssInString( $value ) {
        return ( strpos( $value, '>' ) !== false ) ||
            ( strpos( $value, '<' ) !== false );
    }

    /**
     * Analyzes a string to see if there are special characters
     * at the beginning/end of a string or right before the @symbol
     * @param string $str
     *
     * @return bool True if special character is found at the beginning/end
     * of a string or right before the @symbol
     */
    public static function special_characters_in_wrong_locations( $str ) {
        $specialCharacterRegex = <<<EOT
/
(^[\-])|
([`!@#$%^&*()_+\-=\[\]{};':"\\|,.<>\/?~]+$)
/
EOT;
        // Transform the regex to get rid of the new lines
        $specialCharacterRegex = preg_replace( '/\s/', '', $specialCharacterRegex );
        $matches = [];
        if ( preg_match_all( $specialCharacterRegex, $str, $matches ) > 0 ) {
            return true;
        }
            return false;
    }

    /**
     * Analyzes a string to see if any credit card numbers are hiding out in it
     *
     * @param string $str
     *
     * @return bool True if a CC number was found sneaking about in the shadows
     */
    public static function cc_number_exists_in_str( string $str ): bool {
        $luhnRegex = <<<EOT
/
(?#amex)(3[47][0-9]{13})|
(?#bankcard)(5610[0-9]{12})|(56022[1-5][0-9]{10})|
(?#diners carte blanche)(300[0-5][0-9]{11})|
(?#diners intl)(36[0-9]{12})|
(?#diners US CA)(5[4-5][0-9]{14})|
(?#discover)(6011[0-9]{12})|(622[0-9]{13})|(64[4-5][0-9]{13})|(65[0-9]{14})|
(?#InstaPayment)(63[7-9][0-9]{13})|
(?#JCB)(35[2-8][0-9]{13})|
(?#Laser)(6(304|7(06|09|71))[0-9]{12,15})|
(?#Maestro)((5018|5020|5038|5893|6304|6759|6761|6762|6763|0604)[0-9]{8,15})|
(?#Mastercard)(5[1-5][0-9]{14})|
(?#Solo)((6334|6767)[0-9]{12,15})|
(?#Switch)((4903|4905|4911|4936|6333|6759)[0-9]{12,15})|((564182|633110)[0-9]{10,13})|
(?#Visa)(4([0-9]{15}|[0-9]{12}))
/
EOT;

        $nonLuhnRegex = <<<EOT
/
(?#china union pay)(62[0-9]{14,17})|
(?#diners enroute)((2014|2149)[0-9]{11})
/
EOT;

        // Transform the regex to get rid of the new lines
        $luhnRegex = preg_replace( '/\s/', '', $luhnRegex );
        $nonLuhnRegex = preg_replace( '/\s/', '', $nonLuhnRegex );

        // Remove common CC# delimiters
        $str = preg_replace( '/[\s\-]/', '', $str );

        // Now split the string on everything else and implode again so the regexen have an 'easy' time
        $str = implode( ' ', preg_split( '/[^0-9]+/', $str, PREG_SPLIT_NO_EMPTY ) );

        // First do we have any numbers that match a pattern but is not luhn checkable?
        $matches = [];
        if ( preg_match_all( $nonLuhnRegex, $str, $matches ) > 0 ) {
            return true;
        }

        // Find potential CC numbers that do luhn check and run 'em
        $matches = [];
        preg_match_all( $luhnRegex, $str, $matches );
        foreach ( $matches[0] as $candidate ) {
            if ( self::luhn_check( $candidate ) ) {
                return true;
            }
        }

        // All our checks have failed; probably doesn't contain a CC number
        return false;
    }

    /**
     * Performs a Luhn algorithm check on a string.
     *
     * @param string $str
     *
     * @return bool True if the number was valid according to the algorithm
     */
    public static function luhn_check( $str ) {
        $odd = ( strlen( $str ) % 2 );
        $sum = 0;
        $len = strlen( $str );

        for ( $i = 0; $i < $len; $i++ ) {
            if ( $odd ) {
                $sum += $str[$i];
            } else {
                if ( ( (int)$str[$i] * 2 ) > 9 ) {
                    $sum += (int)$str[$i] * 2 - 9;
                } else {
                    $sum += (int)$str[$i] * 2;
                }
            }

            $odd = !$odd;
        }
        return( ( $sum % 10 ) == 0 );
    }

    /**
     * Calculates and returns the card type for a given credit card number.
     * @param string $card_num A credit card number.
     * @return string|false 'amex', 'mc', 'visa', 'discover', or false.
     */
    public static function getCardType( $card_num ) {
        // validate that credit card number entered is correct and set the card type
        if ( preg_match( '/^3[47][0-9]{13}$/', $card_num ) ) { // american express
            return 'amex';
        } elseif ( preg_match( '/^5[1-5][0-9]{14}$/', $card_num ) ) { // mastercard
            return 'mc';
        } elseif ( preg_match( '/^4[0-9]{12}(?:[0-9]{3})?$/', $card_num ) ) {// visa
            return 'visa';
        } elseif ( preg_match( '/^6(?:011|5[0-9]{2})[0-9]{12}$/', $card_num ) ) { // discover
            return 'discover';
        } else { // an unrecognized card type was entered
            return false;
        }
    }

    /**
     * Returns a valid mediawiki language code to use for all the DonationInterface translations.
     *
     * Will only look at the currently configured language if the 'language' key
     * doesn't exist in the data set: Users may not have a language preference
     * set if we're bouncing between mediawiki instances for payments.
     * @param array $data A normalized DonationInterface data set.
     * @return string A valid mediawiki language code.
     */
    public static function guessLanguage( $data ) {
        if ( array_key_exists( 'language', $data )
            && WmfFramework::isValidBuiltInLanguageCode( $data['language'] ) ) {
            return $data['language'];
        } else {
            return WmfFramework::getLanguageCode();
        }
    }

    /**
     * Takes either an IP address, or an IP address with a CIDR block, and
     * expands it to an array containing all the relevant addresses so we can do
     * things like save the expanded list to memcache, and use in_array().
     * @param string $ip Either a single address, or a block.
     * @return array An expanded list of IP addresses denoted by $ip.
     */
    public static function expandIPBlockToArray( $ip ) {
        $parts = explode( '/', $ip );
        if ( count( $parts ) === 1 ) {
            return [ $ip ];
        } else {
            // expand that mess.
            // this next bit was stolen from php.net and smacked around some
            $corr = ( pow( 2, 32 ) - 1 ) - ( pow( 2, 32 - (int)$parts[1] ) - 1 );
            $first = ip2long( $parts[0] ) & ( $corr );
            $length = pow( 2, 32 - (int)$parts[1] ) - 1;
            $ips = [];
            for ( $i = 0; $i <= $length; $i++ ) {
                $ips[] = long2ip( $first + $i );
            }
            return $ips;
        }
    }

    /**
     * Check whether IP matches a block list
     *
     * TODO: We might want to store the expanded list in memcache.
     *
     * @param string $ip The IP addx we want to check
     * @param array $ip_list IP list to check against
     * @return bool
     */
    public static function ip_is_listed( $ip, $ip_list ) {
        $expanded = [];
        if ( !$ip_list ) {
            return false;
        }
        foreach ( $ip_list as $address ) {
            $expanded = array_merge( $expanded, self::expandIPBlockToArray( $address ) );
        }

        return in_array( $ip, $expanded, true );
    }

    /**
     * Test to determine if a value appears in a haystack. The haystack may have
     * explicit +/- rules (a - will take precedence over a +; if there is no
     * + rule, but there is a - rule everything is implicitly accepted); and may
     * also have an 'ALL' condition.
     *
     * @param mixed $needle Value, or array of values, to match
     * @param mixed $haystack Value, or array of values, that are acceptable
     * @return bool
     */
    public static function value_appears_in( $needle, $haystack ) {
        $needle = ( is_array( $needle ) ) ? $needle : [ $needle ];
        $haystack = ( is_array( $haystack ) ) ? $haystack : [ $haystack ];

        $plusCheck = array_key_exists( '+', $haystack );
        $minusCheck = array_key_exists( '-', $haystack );

        if ( $plusCheck || $minusCheck ) {
            // With +/- checks we will first explicitly deny anything in '-'
            // Then if '+' is defined accept anything there
            // but if '+' is not defined we just let everything that wasn't denied by '-' through
            // Otherwise we assume both were defined and deny everything :)

            // @phan-suppress-next-line PhanTypeMismatchDimFetch
            if ( $minusCheck && self::value_appears_in( $needle, $haystack['-'] ) ) {
                return false;
            }
            // @phan-suppress-next-line PhanTypeMismatchDimFetch
            if ( $plusCheck && self::value_appears_in( $needle, $haystack['+'] ) ) {
                return true;
            } elseif ( !$plusCheck ) {
                // Implicit acceptance
                return true;
            }
            return false;
        }

        if ( ( count( $haystack ) === 1 ) && ( in_array( 'ALL', $haystack ) ) ) {
            // If the haystack can accept anything, then whoo!
            return true;
        }

        $haystack = array_filter( $haystack, static function ( $value ) {
            return !is_array( $value );
        } );
        $result = array_intersect( $haystack, $needle );
        return (bool)$result;
    }

    /**
     * Okay, so this isn't all validation, but there's a validation
     * component in there so I'm calling it close enough.
     * @param string $value the value that should be zero-padded out to $total_length
     * @param int $total_length The fixed number of characters that $value should be padded out to
     * @return string|false The zero-padded value, or false if it was too long to work with.
     */
    public static function getZeroPaddedValue( $value, $total_length ) {
        // first, trim all leading zeroes off the value.
        $ret = ltrim( $value, '0' );

        // now, check to see if it's going to be a valid value at all,
        // and give up if it's hopeless.
        if ( strlen( $ret ) > $total_length ) {
            return false;
        }

        // ...and if we're still here, left pad with zeroes to required length
        $ret = str_pad( $ret, $total_length, '0', STR_PAD_LEFT );

        return $ret;
    }

}