bitslip6/bitfire

View on GitHub
firewall/src/botfilter.php

Summary

Maintainability
B
5 hrs
Test Coverage
<?php

/**
 * BitFire PHP based Firewall.
 * Author: BitFire (BitSlip6 company)
 * Distributed under the AGPL license: https://www.gnu.org/licenses/agpl-3.0.en.html
 * Please report issues to: https://github.com/bitslip6/bitfire/issues
 * 
 * all functions are called via api_call() from bitfire.php and all authentication 
 * is done there before calling any of these methods.
 */

namespace BitFire;

use ThreadFin\CacheItem;

use function BitFireBot\bot_authenticate;
use function BitFireBot\find_ip_as;
use function BitFireBot\is_allowed_xmlrpc;
use function BitFireBot\send_browser_verification;
use function ThreadFin\contains;
use function ThreadFin\dbg;
use function ThreadFin\decrypt_tracking_cookie;
use function ThreadFin\en_json;
use function ThreadFin\memoize;
use function ThreadFin\str_reduce;
use function ThreadFin\trace;
use function ThreadFin\debug;
use function ThreadFin\ends_with;
use function ThreadFin\HTTP\http2;
use function ThreadFin\un_json;
use function ThreadFin\utc_date;

use BitFire\Config as CFG;
use RuntimeException;
use ThreadFin\CacheStorage;
use ThreadFin\Effect;
use ThreadFin\FileData;
use ThreadFin\Maybe;
use ThreadFin\MaybeA;
use ThreadFin\MaybeBlock;
use ThreadFin\MaybeStr;

use const ThreadFin\DAY;
use const ThreadFin\HOUR;

const MAX_HOST_HEADER_LEN = 80;
const UA_NO_MATCH = -1;
const UA_NET_FAIL = 0;
const UA_NET_MATCH = 1;

const AGENT_MATCH = array(
    "brave" => "(brave)/\s*(\d+\.\d+)",
    "opera" => "(opr)/\s*(\d+.\d+)",
    "firefox" => "(firefox)/?\s*(\d+\.\d+)",
    "samsung" => "(samsung)[\s-](SM-[a-z0-9]+)",
    "safari" => "(applewebkit)/\s*(\d+\.\d+)",
    "chrome" => "(chrome)/\s*(\d+\.\d+)",
    "android" => "(android)/?\s*([\d+\.]+)",
    "edge" => "(edge)/\s*(\d+\.\d+)",
    "explorer" => "(msie\s*|trident/)\s*([\d+\.]+)",
    "msie" => "(msie\s*|trident/[\d+\.]+;\s+rv:)\s*([\d+\.]+)",
    "vivaldi" => "(vivaldi)/\s*([\d+\.]+)",
    "yandex" => "(yabrowser)/\s*([\d+\.]+)",
);

const COMMON_WORDS = [ 'applewebkit', 'compatible', 'macintosh', 'mozilla', 'windows', 'version', 'android', 'ubuntu', 'ubuntu', 'mobile', 'linux', 'gecko', 'build', 'khtml', 'intel', 'mobi', 'mint', 'like', 'aft'];

const AGENT_WORDS = [
    "chrome" => "gecko chrome safari",
    "opera" => "opera ubuntu presto symbos android chrome safari",
    "brave" => "gecko brave chrome safari",
    "firefox" => "firefox gecko chrome safari",
    "samsung" => "samsungbrowser samsung chrome safari",
    "edge" => "gecko chrome safari edge",
    "safari" => "gecko safari iphone ipad huaweilio",
    "android" => "android pyramid gecko safari sonyericssonx wildfire desire legend sensation chrome",
    "msie" => "trident gecko msie explorer safari infopath media center zune slcc chromeframe tablet",
    "vivaldi" => "vivaldi gecko chrome safari",
    "yandex" => "yabrowser yowser safari chrome"
];




/**
 * User Agent Model
 * @package BitFire
 */
class UserAgent
{
    /** @var string $os */
    public $os;
    /** @var string $whitelist */
    public $whitelist;
    /** @var string $browser */
    public $browser;
    /** @var string $ver */
    public $ver;
    /** @var string $bot */
    public $bot;
    /** @var int $trim */
    public $trim;
    /** @var int $crc32 */
    public $crc32;
    /** @var int $valid  0, 1, 2*/
    public $valid = 0;

    public function __construct(string $os, string $browser, string $ver, bool $whitelist, bool $bot)
    {
        $this->os = $os;
        $this->browser = $browser;
        $this->ver = $ver;
        $this->whitelist = $whitelist;
        $this->bot = $bot;
    }
}

/**
 * JavaScript code model
 * @package BitFire
 */
class JS_Fn
{
    public $js_code;
    public $fn_name;
    public function __construct($code, $name)
    {
        $this->js_code = $code;
        $this->fn_name = $name;
    }
}

const IPData = '\BitFire\IPData';

/**
 * server side data about an IP
 * @package BitFire
 */
class IPData
{
    public $rr;
    public $rr_time;
    public $ref;
    public $ip_crc;
    public $ua_crc;
    public $ctr_404 = 0;
    public $ctr_500 = 0;
    public $valid = 0;
    public $op1 = 0;
    public $op2 = 0;
    public $oper = 0;
    public $ans = '';

    public function __construct(int $ip_crc, int $ua_crc)
    {
        $this->ip_crc = $ip_crc;
        $this->ua_crc = $ua_crc;
    }

    public static function make_new(string $ip, string $ua): IPData
    {
        $data = new IPData(\BitFireBot\ip_to_int($ip), crc32($ua));
        $data->rr = 0;
        $data->rr_time = time() + 5 * 60;
        $data->ref = \mt_rand(0, \mt_getrandmax());
        return $data;
    }
}

// Sync with server bot_info
/**
 * bot info model. todo, cleanup member variables
 * @package BitFire
 */
class BotInfo
{
    public $id;
    public $valid;
    public $net;
    public $domain;
    public $home_page;
    public $agent;
    public $category;
    public $icon;
    public $favicon;
    public $vendor;
    public $name;
    public $hit = 0;
    public $miss = 0;
    public $not_found = 0;
    public $ips;
    public $class;
    public $country;
    public $country_code;
    public $allow;
    public $allowclass;
    public $mtime;
    public $trim;
    public $time;
    public $machine_date;
    public $machine_date2;
    public $ip_str;
    public $last_time;
    public $classClass;
    public $checked;

    public function __construct($agent) {
        $this->agent = $agent;
        $this->ips = [];
    }
}

/**
 * model for JavaScript Challenge Answer
 * @package BitFire
 */
class Answer
{
    public $op1;
    public $op2;
    public $oper;
    public $ans;
    public $code;

    public function __construct(int $op1, int $op2, int $oper) {
        $this->op1 = $op1;
        $this->op2 = $op2;
        $this->oper = $oper;
        switch ($oper) {
            case 1:
                $this->ans = $op1 * $op2;
                $this->code = "($op1*$op2)";
                break;
            case 2:
                $this->ans = $op1 / $op2;
                $this->code = "($op1/$op2)";
                break;
            case 3:
                $this->ans = $op1 + $op2;
                $this->code = "($op1+$op2)";
                break;
            default:
                $this->ans = $op1 - $op2;
                $this->code = "($op1-$op2)";
                break;
        }
    }

    public function __toString(): string
    {
        return strval($this->ans);
    }
}

class Challenge
{
    public $expire_time;
    public $valid;
    public $answer;
    public $ip;
    public $ua_crc;

    protected function __construct(int $ip_int, int $valid, int $ua_crc, int $exp_time, $answer)
    {
        $this->ip = $ip_int;
        $this->valid = $valid;
        $this->answer = $answer;
        $this->expire_time = time() + $exp_time;
        $this->ua_crc = $ua_crc;
    }

    public static function new(string $ip_str, int $valid, string $ua_str, int $exp_time, $answer)
    {
        return new Challenge(\BitFireBot\ip_to_int($ip_str), $valid, crc32($ua_str), $exp_time, $answer);
    }
}


// 2 calls = 29: cpu
/**
 * compare the request against the match criteria
 * PURE(ish) depends on Config and Exceptions to create the block
 */
function constraint_check(int $fail_code, MatchType $type, \BitFire\Request $request): MaybeBlock
{
    if ($type->match($request)) {
        return BitFire::new_block($fail_code, $type->get_field(), $type->matched_data(), $type->match_pattern(), FAIL_DURATION[$fail_code] ?? 0);
    }

    return Maybe::$FALSE;
}

// create a new ip_data local cache entry
function new_ip_data(string $remote_addr, string $agent): string
{
    trace("new_ip");
    $answer = new Answer(\mt_rand(1000, 500000), \mt_rand(12, 4000), \mt_rand(1, 4));
    $data = array(
        'ip' => \BitFireBot\ip_to_int($remote_addr), 'ua' => crc32($agent), 'ctr_404' => 0, 'valid' => 0,
        'ctr_500' => 0, 'rr' => 0, 'rrtime' => 0, 'op1' => $answer->op1, 'op2' => $answer->op2, 'oper' => $answer->oper
    );
    return pack_ip_data($data);
}

/**
 * map a locally stored data array into an IPData object
 * PURE!
 */
function map_ip_data(string $ip_data): IPData
{
    $data = unpack_ip_data($ip_data);
    $ip = new IPData($data['ip'] ?? 0, $data['ua'] ?? 0);
    $ip->ctr_404 = $data['ctr_404'] ?? 0;
    $ip->ctr_500 = $data['ctr_500'] ?? 0;
    $ip->rr = $data['rr'] ?? 0;
    $ip->rr_time = $data['rrtime'] ?? 0;
    $ip->valid = $data['valid'] ?? 0;
    $ip->ans = $data['ans'] ?? 0;
    $ip->op1 = $data['op1'] ?? 0;
    $ip->op2 = $data['op2'] ?? 0;
    $ip->oper = $data['oper'] ?? 0;
    return $ip;
}

function unpack_ip_data(string $data): array
{
    $d = unpack("Nip/Nua/Sctr_404/Sctr_500/Srr/Nrrtime/Cvalid/Nop1/Nop2/Coper", $data);
    return $d;
}

function pack_ip_data(array $ip_data): string
{
    $t1 = pack("NNSSSNCNNC*", $ip_data['ip'], $ip_data['ua'], $ip_data['ctr_404'], $ip_data['ctr_500'], $ip_data['rr'], $ip_data['rrtime'], $ip_data['valid'], $ip_data['op1'], $ip_data['op2'], $ip_data['oper']);
    return $t1;
}

/**
 * counts number of : >= 3
 * PURE
 */
function is_ipv6(string $addr): bool
{
    return substr_count($addr, ':') >= 3;
}



/**
 * reverse ip lookup, takes ipv4 and ipv6 addresses, 
 */
function reverse_ip_lookup(string $ip): string
{
    $ip = trim($ip);
    // handle localhost case
    if ($ip == "127.0.0.1" || $ip == "::1") {
        return "localhost";
    }

    if (CFG::str('dns_service', 'localhost') == "1.1.1.1") {
        $lookup_addr = "";
        if (is_ipv6($ip)) {
            // remove : and reverse the address
            $ip = strrev(str_replace(":", "", $ip));
            // insert a "." after each reversed char and suffix with ip6.arpa
            $lookup_addr = str_reduce($ip, function ($chr) {
                return $chr . ".";
            }, "", "ip6.arpa");
        } else {
            $parts = explode('.', $ip);
            assert((count($parts) === 4), "invalid ipv4 address [$ip]");
            $lookup_addr = "{$parts[3]}.{$parts[2]}.{$parts[1]}.{$parts[0]}.in-addr.arpa";
        }

        return fast_ip_lookup($lookup_addr, 'PTR');
    }
    $lookup = gethostbyaddr($ip);
    debug("gethostbyaddr [%s] = (%s)", $ip, $lookup);
    return ($lookup !== false) ? $lookup : "";
}

/**
 * queries quad 1 for dns data over SSL or uses local DNS services
 * @returns a string with the result, or empty string
 */
function ip_lookup(string $ip, string $type = "A"): string
{
    assert(in_array($type, array("A", "AAAA", "CNAME", "MX", "NS", "PTR", "SRV", "TXT", "SOA")), "invalid dns query type [$type]");
    debug("ip_lookup %s / %s", $ip, $type);
    $dns = "";
    if (CFG::str('dns_service') === 'localhost') {
        $lookup = ($type === "PTR") ? gethostbyaddr($ip) : gethostbyname($ip);
        return ($lookup !== false) ? $lookup : "";
    }
    try {
        $url = "https://1.1.1.1/dns-query?name=$ip&type=$type";
        $response = http2("GET", $url, '', ['accept' => 'application/dns-json', 'Content-Type' => 'application/dns-json']);
        $raw = $response->content;
        if ($raw !== false) {
            $formatted = un_json($raw);
            if (isset($formatted['Authority'])) {
                $dns = end($formatted['Authority'])['data'] ?? '';
            } else if (isset($formatted['Answer'])) {
                $dns = end($formatted['Answer'])['data'] ?? '';
            }
        }
    } catch (\Exception $e) {
        // silently swallow http errors.
    }

    return $dns;
}

/**
 * memoized version of ip_lookup (1 hour)
 * NOT PURE
 */
function fast_ip_lookup(string $ip, string $type = "A"): string {
    return memoize('BitFire\ip_lookup', "dns_{$type}_{$ip}", 3600)($ip, $type);
}

/**
 * memoized version of ip_lookup (1 hour)
 * NOT PURE
 */
function fast_reverse_lookup(string $ip): string {
    return memoize('BitFire\reverse_ip_lookup', "rev_{$ip}", 3600)($ip);
}

function fast_ip_verify(string $ip): bool {
    $ip_fn = memoize('gethostbyaddr', "ip_{$ip}", 3600);
    $name_fn = memoize('gethostbyname', "name_{$ip}", 3600);

    $fqdn = $ip_fn($ip);
    $rip = $name_fn($fqdn);

    if ($rip == $ip) {
        return true;
    }
}




/**
 * load the local data for the remote IP
 */
function get_server_ip_data(string $remote_addr, string $agent): IPData {
    $ip_key = "BITFIRE_IP_$remote_addr";
    $data = CacheStorage::get_instance()->update_data(
        $ip_key,
        function ($data) {

            $t = time();
            $ip_data = unpack_ip_data($data);

            // update request rate counter
            if ($ip_data['rrtime'] < $t) {
                $ip_data['rr'] = 0;
                $ip_data['rrtime'] = $t + (60 * 5);
            }
            $ip_data['rr']++;
            trace("RR:" . $ip_data['rr']);

            $d = pack_ip_data($ip_data);
            return $d;
        },
        function () use ($remote_addr, $agent) {
            return \BitFire\new_ip_data($remote_addr, $agent);
        },
        60 * 15
    );


    return map_ip_data($data);
}


/**
 */
class BotFilter
{

    /** @var UserAgent $browser - the parsed useragent info */
    public $browser;
    public $cache;
    public $ua_match;
    public $ua_check;

    public $ip_data = NULL;

    protected $_constraints;

    public function __construct(CacheStorage $cache)
    {
        $this->cache = $cache;
        $this->_constraints = array(
            FAIL_PHP_UNIT => new MatchType(MatchType::CONTAINS, "path", '/phpunit', BLOCK_SHORT),
            FAIL_THRIVE_KRAKEN => new MatchType(MatchType::REGEX, "post_raw", '/td_option_webhook.*?kraked_url/', BLOCK_MEDIUM),
            FAIL_EVT_CAL => new MatchType(MatchType::REGEX, "post_raw", '/td_option_webhook.*?kraked_url/', BLOCK_MEDIUM),
            //FAIL_WP_ENUM => new MatchType(MatchType::CONTAINS, "path", '/wp-json/wp/v2/users', 0),
            FAIL_HONEYPOT => new MatchType(MatchType::EXACT, "path", Config::str(CONFIG_HONEYPOT, '/no_such_path'), BLOCK_MEDIUM),
            FAIL_METHOD => new MatchType(MatchType::NOTIN, "method", Config::arr(CONFIG_METHODS), BLOCK_SHORT)
        );
    }


    /**
     * inspect the UA, determine human or bot
     * perform human validation, bot white/black listing
     * 
     * CPU: 359
     * NOT PURE!
     */
    public function inspect(\BitFire\Request $request): MaybeBlock
    {
        trace("bot");
        $block = Maybe::$FALSE;
        $this->browser = new UserAgent("bot", "bot", "1.0", false, true);


        // if XMLRPC is enabled, add /xmlrpc.php to the list of bot urls
        if (Config::disabled("block_xmlrpc")) {
            $list = Config::arr("ignore_bot_urls");
            $list[] = "/xmlrpc.php";
            Config::set_value("ignore_bot_urls", $list);
        }



        // ignore urls that receive consistent bot access that may be difficult to identify
        if (in_array($request->path, Config::arr("ignore_bot_urls"))) {
            return $block;
        }

        // handle wp-cron and other self requested pages
        if (\BitFireBot\is_local_request($request)) {
            return $block;
        }

        
        // get details about the agent
        $this->browser = \BitFireBot\parse_agent($request->agent);

        // ugly, impure crap
        $this->ip_data = get_server_ip_data($request->ip, $request->agent);
        //$this->ip_data = map_ip_data( new_ip_data($request->ip, $request->agent));

        // bot tracking cookie
        $maybe_bot_cookie = BitFire::get_instance()->cookie;

        $this->browser->valid = max($this->ip_data->valid, $maybe_bot_cookie->extract('v', 0)->value('int'));
        if ($maybe_bot_cookie->extract("wp", 0)->value("int") > 1) {
            $this->browser->valid = 2;
        }
        trace("BV" . $this->browser->valid . " SV" . $this->ip_data->valid);

        // browser has already been verified, dont need to do any further checking
        if ($this->browser->valid == 2) {
            return $block;
        }
        

        // check host header is not garbage
        $block->do_if_not('\BitFireBot\header_check', $request);

        // block constraints
        // cpu: 52
        $block->do_if_not('\ThreadFin\map_whilenot', $this->_constraints, "\BitFire\constraint_check", $request);

        // handle bots
        $this->browser->whitelist = false;

        // authenticate bots...
        if ($block->empty() && $this->browser->bot) {
            // bot blacklist
            if (Config::enabled(CONFIG_BLACKLIST_ENABLE)) {
                $block->do_if_not('\BitFireBot\blacklist_inspection', $request, file(\BitFire\WAF_ROOT . 'cache/bad-agent.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES));
            }

            // only do bot checks if not already blocked by blacklist
            if ($block->empty() && Config::enabled("whitelist_enable")) {
                if (!is_allowed_xmlrpc($request)) {
                    $effect = bot_authenticate($this->browser, $request->ip, $request->agent);
                    $effect->run();
                }
            }
        }

        
        // validate request rate, don't check for whitelist bots or admins
        if (!$this->browser->whitelist) {
            if (CFG::enabled("rate_limit")) {
                $wp = $maybe_bot_cookie->extract("wp")->value('int');
                trace("RRCHK[$wp]");
                if ($wp < 2) {
                    trace("WP<2");
                    // not admin or whitelisted bot, do the rate check
                    $block->do_if_not('\BitFireBot\validate_rr', Config::int(CONFIG_RR_5M), $this->ip_data);
                } else {
                    trace("WP>1");
                }
            }
        }

        // request has no host header
        if (Config::enabled(CONFIG_CHECK_DOMAIN)) {
            trace("dom");
            if (!\BitFireBot\validate_host_header(Config::arr(CONFIG_VALID_DOMAIN_LIST), $request->host)) {
                // allow valid whitelist bots to access the site
                if (!$this->browser->whitelist) {
                    $maybe = BitFire::new_block(FAIL_INVALID_DOMAIN, "host", $request->host, en_json(Config::arr(CONFIG_VALID_DOMAIN_LIST)), BLOCK_MEDIUM);
                    if (!$maybe->empty()) {
                        return $maybe;
                    }
                }
            }
        }

        // last verify real browsers, don't verify bots
        // set browser validity to cookie value or server ip data
        if (!$this->browser->bot && CFG::is_block(CONFIG_REQUIRE_BROWSER) && (CFG::enabled('cookies_enabled') || CFG::str("cache_type") != 'nop')) {
            if (!$this->browser->whitelist && !ends_with($request->path, "admin-ajax.php")) {
                $effect = $this->verify_browser($request, $maybe_bot_cookie);
                $effect->run();
            }
        }

        // 5% update failed challenge bots
        if (\mt_rand(0, 100) < 5) {
            $bot_file_list = glob(BLOCK_DIR . "/*.bot.txt");
            array_map("\BitFire\bot_to_block", $bot_file_list);
        }

        return $block;
    }

    protected function verify_browser(\BitFire\Request $request, MaybeStr $maybe_bot_cookie)
    {
        // javascript browser challenges
        if ($this->browser->valid < 2 && Config::is_block(CONFIG_REQUIRE_BROWSER)) {
            if (isset($_POST['_bfxa']) || (strlen($request->post_raw) > 20 && contains($request->post_raw, '_bfxa'))) {
                $effect = verify_browser_effect($request, $this->ip_data, $maybe_bot_cookie);
                // IMPORTANT, even though we have a POST, we are going to impersonate the original request!
                // UGLY, move this to function
                if ($effect->read_status() == STATUS_OK) {
                    $method = $_POST['_bfm'];
                    $uri = $_SERVER['REQUEST_URI'];
                    // reset the get, post and request method with the original page request values
                    // this allows us to recreate the original request that we intercepted to verify
                    // the browser runs JavaScript
                    $_SERVER['REQUEST_METHOD'] = $maybe_bot_cookie->extract('m', $method)();
                    $_GET = un_json($maybe_bot_cookie->extract('g', $_POST['_bfg'] ?? "")());
                    $_POST = un_json($maybe_bot_cookie->extract('p', $_POST['_bfp'] ?? "")());
                    // remove any possible cache busting from the browser required reload script
                    unset($_GET['_rqw']);
                    $_SERVER['REQUEST_URI'] = str_replace('_rqw=xpr', '', $uri);
                }
                return $effect;
            } else {
                return send_browser_verification($this->ip_data, $request);
            }
        } else {
            trace("valid");
        }

        return new Effect(STATUS_OK);
    }
}

/**
 * impure, delete old bot files
 * @param string $file path to bot file
 * @return void 
 * @throws RuntimeException 
 */
function bot_to_block(string $file)
{
    // TODO: update percent call time, and wait time (99, 1)
    // if file is older than 60 seconds, delete it  
    if (file_exists($file) && filemtime($file) < (time() + 1)) {

        // ugly hack to recreate the original request
        /** @var Request $request */
        $tmp = un_json(file_get_contents($file));
        $request = new Request();
        $request->agent = $tmp["agent"] ?? '?';
        $request->ip = $tmp["ip"] ?? '?';
        $request->method = $tmp["method"] ?? "?";
        $request->scheme = $tmp["scheme"] ?? "?";
        $request->path = $tmp["path"] ?? '/';
        $request->host = $tmp["host"] ?? '';
        $request->get = $tmp["get"] ?? [];
        $request->post = $tmp["post"] ?? [];


        /*
        // DNS lookups are slow, do this on the dashboard page
        // TODO: add reverse dns lookup to dashboard
        $reverse_ip = \BitFire\reverse_ip_lookup($request->ip);
        $as = "";
        if (preg_match("/[a-z0-9][a-z0-9-]+[a-z0-9]\.[a-z]{2,}$/", $reverse_ip, $matches)) {
            BitFire::new_block(FAIL_FAKE_BROWSER, $request->agent, "Reverse DNS:[{$matches[0]}]", "did not complete JavaScript challenge", 0, $request);
        } else {
            $as = find_ip_as($request->ip);
            $match = (empty($as)) ? "no whitelist for the agent" : "AS{$as}";
        }
        */
        BitFire::new_block(FAIL_FAKE_BROWSER, $request->agent, "DEFER:reverse_dns", "did not complete JavaScript challenge", 0, $request);
        debug("agent: \"%s\", ip: %s", $request->agent, $request->ip);
        unlink($file);
    }
}

/**
 * return a cache item for metrics that increments the stat: $stat
 * @test test_bot_metric_inc
 * PURE !
 */
function bot_metric_inc(string $stat): CacheItem
{
    return new CacheItem(
        'metrics-' . utc_date('G'),
        function ($data) use ($stat) {
            $data[$stat] = ($data[$stat] ?? 0) + 1;
            return $data;
        },
        function () {
            return BITFIRE_METRICS_INIT;
        },
        DAY
    );
}


/**
 * try to clear all server and client state and re-load the page
 */
function browser_clear(\BitFire\Request $request): Effect
{
    $key = "BITFIRE_IP_" . $request->ip;
    trace("BRCLR");
    return Effect::new()->cookie('', "browser_clear")
        ->update(new CacheItem($key, function ($x) {
            return '';
        }, function () {
            return '';
        }, -DAY))
        ->update(bot_metric_inc('broken'))
        ->header("Clear-Site-Data", "\"cookies\", \"executionContexts\"")
        ->header("Location", $request->path)
        ->status(STATUS_SERVER_STATE_FAIL)
        ->exit(true);
}

/**
 * verifies the response matches the expected bot verification code
 * @test test_verify_browser
 * PURE! 
 */
function verify_browser_effect(\BitFire\Request $request, IPData $ip_data, MaybeA $cookie): Effect
{

    $effect = Effect::new();
    // user manually refreshed the page, lets clear as much server state as we can and try to reload the original page
    if ($request->get['_rqw'] ?? '' === 'xpr') {
        trace("CLR");
        return browser_clear($request);
    }

    trace("VRFY");
    $answer = new Answer($ip_data->op1, $ip_data->op2, $ip_data->oper);
    $cookie_answer = $cookie->extract('a')->extract('ans');

    // test verify browser needs to pass the tests...

    if (strlen($request->post_raw) > 20) {
        $tmp = urldecode($request->post_raw);
        $result2 = json_decode($tmp, true);
        if (empty($result2) || count($result2) < 3) {
            //debug(" # parse str");
            parse_str($tmp, $result2);
        }
        //debug(" # POST: (%s)  --- [%s]  ---- [%s]", $request->post_raw, $tmp, $result2);
        $bfa = $result2["_bfa"] ?? 0;
    } else if (isset($request->post["_bfa"])) {
        $bfa = $request->post["_bfa"];
    }
    debug("x-valid-answer cookie: (%s) server: (%s) / %s", $cookie_answer(), $answer->ans, $bfa);

    // unable to read correct answer from ip_data or cookie
    // lets clear as much server state as we can and try to reload the original page
    if ($cookie_answer->value('int') == 0) {
        return browser_clear($request);
    }


    // correct answer
    if ($cookie_answer->value('int') === intval($bfa) || $answer->ans == intval($bfa)) {
        debug("x-challenge: pass");

        // update the browser valid state!
        BitFire::get_instance()->bot_filter->browser->valid = 2;

        // increase metric counter
        //$effect->update(bot_metric_inc('valid'))
        $effect->status(STATUS_OK)
            // set the response valid cookie
            ->cookie(en_json(array('ip' => crc32($request->ip), 'v' => 2, 'ua' => crc32($request->agent), 'et' => time() + 86400, 'wp' => $cookie->extract('wp')->value('int'))), "botfilter_verify")
            // update the ip_data valid state for 60 minutes, TODO: make this real func, not anon-func
            ->update(new CacheItem(
                'BITFIRE_IP_' . $request->ip,
                function ($data) {
                    $ip_data = unpack_ip_data($data);
                    $ip_data['valid'] = 2;
                    return pack_ip_data($ip_data);
                },
                function () use ($request) {
                    return \BitFire\new_ip_data($request->ip, $request->agent);
                },
                HOUR
            ));
    }
    // incorrect answer: TODO: if this is a POST, then we need to redirect BACK to a GET so that if the user
    // refreshes the page, they don't POST again the wrong data...
    else {
        debug("x-challenge: fail [%d] / [%d]", $cookie_answer->value('int'), $request->post["_bfa"] ?? "n/a");
        $effect = browser_clear($request);
        $effect->out(file_get_contents(\BitFire\WAF_ROOT . "views/browser_required.html"))
            ->header("Clear-Site-Data", "\"cookies\", \"executionContexts\"")
            ->status(STATUS_FAIL)
            ->exit(true);
    }

    return $effect;
}

namespace BitFireBot;

use BitFire\Answer;
use BitFire\BitFire;
use BitFire\BotInfo;
use BitFire\Config;
use BitFire\Config as CFG;
use BitFire\JS_Fn;
use BitFire\Request;
use BitFire\UserAgent;
use RuntimeException;
use ThreadFin\CacheItem;
use ThreadFin\Effect;
use ThreadFin\FileData;
use ThreadFin\FileMod;
use ThreadFin\Maybe;
use ThreadFin\MaybeA;
use ThreadFin\MaybeBlock;
use ThreadFin\MaybeStr;

use function BitFire\block_now;
use function BitFire\is_ipv6;
use function BitFire\Pure\json_to_file_effect;
use function BitFireSvr\add_ini_value;
use function ThreadFin\array_shuffle;
use function ThreadFin\cache_prevent;
use function ThreadFin\contains;
use function ThreadFin\dbg;
use function ThreadFin\decrypt_tracking_cookie;
use function ThreadFin\each_character;
use function ThreadFin\ends_with;
use function ThreadFin\find_regex_reduced;
use function ThreadFin\in_array_ending;
use function ThreadFin\is_regex_reduced;
use function ThreadFin\memoize;
use function ThreadFin\debug;
use function ThreadFin\en_json;
use function ThreadFin\get_hidden_file;
use function ThreadFin\HTTP\http2;
use function ThreadFin\partial as BINDL;
use function ThreadFin\partial_right as BINDR;
use function ThreadFin\random_str;
use function ThreadFin\trace;
use function ThreadFin\utc_date;

use const BitFire\AGENT_MATCH;
use const BitFire\AGENT_WORDS;
use const BitFire\APP;
use const BitFire\BITFIRE_METRICS_INIT;
use const BitFire\BLOCK_DIR;
use const BitFire\COMMON_WORDS;
use const BitFire\CONFIG_ENCRYPT_KEY;
use const BitFire\CONFIG_REQUIRE_BROWSER;
use const BitFire\CONFIG_USER_TRACK_COOKIE;
use const BitFire\DS;
use const BitFire\FAIL_FAKE_WHITELIST;
use const BitFire\FAIL_HOST_TOO_LONG;
use const BitFire\FAIL_IS_BLACKLIST;
use const BitFire\FAIL_MISS_WHITELIST;
use const BitFire\FAIL_RR_TOO_HIGH;
use const BitFire\MAX_HOST_HEADER_LEN;
use const BitFire\UA_NET_FAIL;
use const BitFire\UA_NET_MATCH;
use const BitFire\UA_NO_MATCH;
use const BitFire\WAF_ROOT;
use const BitFire\WAF_SRC;
use const ThreadFin\DAY;

/**
 * check bad bots that send crap in the host header
 * @test test_bot.php test_header_check
 * PURE!
 */
function header_check(\BitFire\Request $request): MaybeBlock
{
    if (strlen($request->host) > MAX_HOST_HEADER_LEN) {
        return BitFire::new_block(FAIL_HOST_TOO_LONG, "HTTP_HOST", $request->host, 'len < 80', CFG::int("short_block_time", 600));
    }
    return Maybe::$FALSE;
}


/**
 * convert an IP to a 32bit int.  possible collisions for ipv6 addresses.  
 * unlikely to be significant
 * @test test_bot.php test_ip_to_int
 * PURE
 */
function ip_to_int(string $ip): int
{
    return crc32($ip);
}

/**
 * return true if the request is from the local server
 * PURE
 */
function is_local_request(\BitFire\Request $request): bool {

    if (isset($_SERVER['SERVER_ADDR'])) {
        if (contains($request->ip, [$_SERVER['SERVER_ADDR'], '127.0.0.1', '::1'])) {
            return true;
        }
    }

    // source agent is localhost
    // can probably remove this after completing above TODO
    if (
        strstr($request->agent, 'wordpress/' . CFG::str('wp_version')) !== false &&
        strstr($request->agent, $request->host) !== false
    ) {
        // some hosts will route this through their local gw
        $ip1 = explode(".", $request->ip);
        $ip2 = explode(".", $_SERVER['SERVER_ADDR']??'127.0.0.1');
        $ip1b = array_slice($ip1, 0, 3);
        $ip2b = array_slice($ip2, 0, 3);
        $ip1c = join(".", $ip1b);
        $ip2c = join(".", $ip2b);
        if ($ip1c == $ip2c) {
            return true;
        }
        if (
            ends_with($request->path, 'wp-cron.php')
            || (strlen($request->path) < 2)
            || ends_with($request->path, 'admin-ajax.php')
        ) {
            return true;
        }
        return false;
    }

    return false;
}

/**
 * test if the ip_data exceeds request rate
 * @test test_bot.php test_validate_rr
 * PURE!
 */
function validate_rr(int $rr_5m_limit, \BitFire\IPData $ip_data): MaybeBlock
{
    if ($ip_data->rr > $rr_5m_limit) {
        $block = BitFire::new_block(FAIL_RR_TOO_HIGH, 'REQUEST_RATE', "request rate: " . $ip_data->rr, "request rate limit: $rr_5m_limit", CFG::int("short_block_time", 600));
        if ($ip_data->rr > $rr_5m_limit + 1) {
            $block->do(function ($x) {
                $x->skip_reporting = true;
                return $x;
            });
        }
        return $block;
    }

    return Maybe::$FALSE;
}

/**
 * do a reverse lookup and return true if remote_ip matches network_regex
 * depends on ip lookup 
 * 
 * NOT PURE!
 */
function verify_bot_ip(string $remote_ip, string $network_regex): bool
{
    // check if the remote IP is in an allowed list of IPs
    $ip_checks = (strpos($network_regex, ',') > 0) ? explode(',', $network_regex) : array($network_regex);
    $ip_checks = array_map(function ($x) {
        return ".*{$x}";
    }, $ip_checks);
    $ip_matches = array_reduce($ip_checks, is_regex_reduced($remote_ip), false);
    if ($ip_matches) {
        return true;
    }

    // fwd and reverse lookup
    $ip = \ThreadFin\MaybeStr::of(\BitFire\reverse_ip_lookup($remote_ip))
        ->then(function ($value) use ($ip_checks) {
            return array_reduce($ip_checks, find_regex_reduced($value), NULL);
        })->then('BitFire\fast_ip_lookup');

    return $ip() === $remote_ip;
}

/**
 * connect to whois and verify IP AS number, with caching
 * @test test_bot.php test_memoization_verify_bot_as
 * NOT PURE
 */
function fast_verify_bot_as(string $remote_ip, bool $carry, string $network): bool
{
    if ($carry) {
        return $carry;
    }
    $verify_string = memoize('\BitFireBot\verify_bot_as', "{$network}_{$remote_ip}", 3600)($remote_ip, $network);
    return ($verify_string === "yes") ? true : false;
}

/**
 * TODO: add CIDR notation here...
 * 
 * connect to whois and verify IP AS number
 * @test test_bot.php test__verify_bot_as
 * @return string "yes" or "no", (load_or_cache does not support bool types)
 * NOT PURE
 */
function verify_bot_as(string $remote_ip, string $network): string
{
    $x = MaybeA::of(fsockopen("whois.radb.net", 43, $no, $str, 1))
        ->effect(BINDR('\fputs', "$remote_ip\r\n"))
        ->then('\ThreadFin\read_stream')
        ->keep_if(BINDR('stristr', $network));
    return $x->empty() ? "no" : "yes";
}

/**
 * find the AS number of the remote IP
 * @param string $remote_ip 
 * @return string the AS number as a string or empty string
 */
function find_ip_as(string $remote_ip): string
{
    $x = MaybeStr::of(fsockopen("whois.radb.net", 43, $no, $str, 1))
        ->effect(BINDR('\fputs', "$remote_ip\r\n"))
        ->then('\ThreadFin\read_stream')();
    if (preg_match("/AS([0-9]+)/", $x, $matches)) {
        return $matches[1];
    }
    return "";
}


/**
 * used in ip cache creation
 */
function is_ip_in_cidr_list(string $remote_ip, array $routes): bool
{

    if (is_ipv6($remote_ip)) {
        $ip_bytes = unpack('n*', inet_pton($remote_ip));
        return array_reduce($routes, function ($carry, string $route) use ($ip_bytes, $remote_ip) {
            [$route_ip, $netmask] = explode('/', $route, 2);
            $netmask = intval($netmask);
            $route_bytes = unpack('n*', @inet_pton($route_ip));

            for ($i = 1, $ceil = ceil($netmask / 16); $i <= $ceil; ++$i) {
                $left = $netmask - 16 * ($i - 1);
                $left = ($left <= 16) ? $left : 16;
                $mask = ~(0xffff >> $left) & 0xffff;
                if (($ip_bytes[$i] & $mask) != ($route_bytes[$i] & $mask)) {
                    return false;
                }
            }
            return true;
        }, false);
    } else {
        $s1 = sprintf('%032b', ip2long($remote_ip));
        return array_reduce($routes, function ($carry, string $route) use ($s1) {
            if ($carry === 0) {
                return $carry;
            }
            [$ip, $netmask] = explode('/', $route, 2);
            return substr_compare($s1, sprintf('%032b', ip2long($ip)), 0, intval($netmask));
        }, 1) === 0;
    }
}

/**
 * parse all lines of whois route lookup 
 * PURE!
 */
function parse_whois_route(string $output): ?array
{
    return array_map('\BitFireBot\parse_whois_line', explode("\n", $output));
}

/**
 * parse 'route    : 1.2.3.4/24' into '1.2.3.4/24'
 * PURE!
 */
function parse_whois_line(string $line): string
{
    $parts = explode(": ", $line);
    return trim($parts[1] ?? '');
}

// return false if valid_domains has entries and request['host'] is not in it, true otherwise
// PURE!
function validate_host_header(array $valid_domains, string $host): bool
{
    return (!empty($valid_domains)) ?  in_array_ending($valid_domains, $host) : true;
}

/**
 * test if an agent is found in a list of agents
 * $list is format "agent match str":reverse ip network:human comment
 * -1 - no UA match, 0 UA match network fail, 1 UA and network match
 * NOT PURE! depends on DNS and WHOIS
 */
function agent_in_list(string $agent, string $ip, array $list): int
{
    if (empty($agent) || strlen($agent) <= 1) {
        return UA_NO_MATCH;
    }

    $agent_crc = "crc" . crc32($agent);
    foreach ($list as $k => $v) {
        assert(is_string($k), "agent list must be only string values");
        assert(!empty($k), "agent list must be only string values");

        debug("agent_in_list: $agent ($agent_crc) vs $k ($v)");

        if (strpos($agent, $k) === false && $agent_crc != $k) {
            continue;
        }
        if ($v === "*") {
            return UA_NET_MATCH;
        }
        BitFire::get_instance()->bot_filter->ua_match = $k;
        BitFire::get_instance()->bot_filter->ua_check = $v;

        // handle auto-discover whitelist AS
        if ($v === "discover") {
            debug("  ! -> DISCOVER AS");
            $as = find_ip_as($ip);
            if (!empty($as)) {
                include_once \BitFire\WAF_SRC . "/server.php";
                // TODO: replace with update_ini_fn(,,true)
                add_ini_value("botwhitelist[$agent]", "AS{$as}", "discover", WAF_ROOT . "/cache/whitelist_agents.ini");
                return UA_NET_MATCH;
            }
        }

        debug("substr $v, 0, 2) == AS");
        // reverse lookup, or AS network check (can check multiple AS networks)
        $r = (substr($v, 0, 2) == "AS") ?
            array_reduce(explode(',', $v), BINDL('\BitFireBot\fast_verify_bot_as', $ip), false) :
            \BitFireBot\verify_bot_ip($ip, $v);
        return ($r) ? UA_NET_MATCH : UA_NET_FAIL;
    }

    // no match, return false
    return UA_NO_MATCH;
}

/**
 * check if agent is in whitelist, true if we have whitelist and no match, false if no whitelist, bock if 
 * 
 * NOT PURE: depends on external dns and whois
 */
function whitelist_inspection(string $agent, string $ip, ?array $whitelist, bool $bot = true): int
{
    // configured to only allow whitelisted bots, so we can block here 
    // handle whitelisting (the most restrictive)
    // return true(pass) if the agent is in the list of whitelist bots
    if (!empty($whitelist) && !empty($agent)) {
        return agent_in_list($agent, $ip, $whitelist);
        /*
        debug("whitelist_inspection: $agent, $ip, [$r]");
        // only bot's can miss whitelist.  regular browsers will fall through to JavaScript
        if ($r == UA_NET_MATCH) {
            return UA_NET_MATCH;
        }
        if ($bot) {
            if ($r < 1) { 
                return FAIL_MISS_WHITELIST;
            }
            return FAIL_FAKE_WHITELIST;
        }
        */
    }
    return UA_NO_MATCH;
}

/**
 * returns true if the useragent / ip is not blacklisted, false otherwise
 * @test test_bot.php test_blacklist_inspection
 * PURE!
 */
function blacklist_inspection(\BitFire\Request $request, ?array $blacklist): MaybeBlock
{
    trace("BLKCHK");
    $match = new \BitFire\MatchType(\BitFire\MatchType::CONTAINS, "agent", $blacklist, CFG::int("block_medium_time", 3600));
    if ($match->match($request) !== false) {
        return BitFire::new_block(FAIL_IS_BLACKLIST, "user_agent", $request->agent, $match->match_pattern(), cfg::int("block_medium_time", 3600));
    }

    return Maybe::$FALSE;
}


/**
 * this function takes a useragent and turns it into an array with os, browser, bot and ver
 * return array('os', 'browser', 'ver', 'bot':bool)
 * total: 58 
 * return UserAgent
 * @test test_bot.php test_parse_agent
 * PURE!
 */
function parse_agent(string $user_agent): UserAgent {
    $agent = new UserAgent('bot', $user_agent, "x", false, true);

    // remove anything that is not alpha
    $agent_min1 = preg_replace("/[^a-z\s]+/", " ", strtolower(trim($user_agent)));
    // remove short words
    $agent_min2 = preg_replace("/\s+/", " ", preg_replace("/\s[a-z]{1,3}\s([a-z]{1-3}\s)?/", " ", $agent_min1));
    $agent->trim = substr($agent_min2, 0, 250);
    $agent->crc32 = crc32($agent_min2);
    if (preg_match("!\d+\.\d+\.?\d*!", substr($user_agent,11), $matches)) {
        $agent->ver = $matches[0];
    }

    // return robots immediately...
    if (substr($user_agent, 0, 11) !== "mozilla/5.0") {
        return $agent;
    }

    // remove common browser words, only work on what is unique
    $rem_fn = function ($carry, $item) {
        return str_replace($item, "", $carry);
    };
    $agent_min_words = array_filter(explode(" ", array_reduce(COMMON_WORDS, $rem_fn, $agent_min2)));

    // Identify the browser OS
    $os_list = array("blackberry", "ipad", "iphone", "linux", "android", "os x", "windows", "blackberry", "samsung");
    $agent->os = array_reduce($os_list, function (string $carry, string $os) use ($user_agent) {
        return (strpos($user_agent, $os) !== false) ? $os : $carry;
    }, "bot");


    // find a matching browser
    $parsed_agent = array_reduce(array_keys(AGENT_MATCH), function (\BitFire\UserAgent $carry, string $match_key) use ($user_agent, $agent_min_words) {
        // only check if we have not found a browser yet
        if ($carry->bot) {
            // check if the agent matches the regex
            $pattern = AGENT_MATCH[$match_key]??'no_such_agent';
            //echo "check /$pattern/ in $user_agent\n";
            if (preg_match("!$pattern!", $user_agent, $matches)) {

                // check if the agent has any words not in the list of words for the browser
                $misses = array_diff($agent_min_words, explode(" ", AGENT_WORDS[$match_key]??$match_key));
                // remove any found words that are less than 4 characters, 
                // this allows for small version differences
                $important_words = array_filter($misses, function ($word) {
                    return (strlen(trim($word)) > 3);
                });

                // if we don't have any unknown extra words then we have a browser match
                if (count($important_words) == 0) {
                    $carry->browser = $match_key;
                    if (!empty($matches[2])) { $carry->ver = $matches[2]??"?.?"; }
                    $carry->bot = false;
                } else {
                    $carry->browser = join(" ", $important_words);
                }
            }
        }
        return $carry;
    }, $agent);

    // make sure it's not really a browser...
    if ($parsed_agent->bot) {
        $words = explode(" ", $parsed_agent->browser);
        // if we have a bot with lots of junk in the UA, don't even bother checking...
        $num_words = count($words);
        if ($num_words > 0 && $num_words < 5) {
            $weird_agents = FileData::new(WAF_ROOT."cache/browsers.txt")->read(false)->lines;
            foreach ($words as $word) {
                if (in_array($word, $weird_agents)) {
                    $parsed_agent->bot = false;
                }
            }
        }
    }

    return $parsed_agent;
}


/**
 * get the user tracking cookie from Config and $_COOKIE vars.
 * requires ip and agent to validate the cookie
 */
function get_tracking_cookie(string $ip, string $agent): MaybeA
{
    return decrypt_tracking_cookie(
        $_COOKIE[Config::str(CONFIG_USER_TRACK_COOKIE)] ?? '',
        Config::str(CONFIG_ENCRYPT_KEY),
        $ip,
        $agent
    );
}


/**
 * return a function that returns a string to call $fn_name with the argument 
 * @test test_bot.php test_js_fn
 * PURE !
 */
function js_fn(string $fn_name): callable
{
    return function ($arg) use ($fn_name) {
        return "{$fn_name}($arg)";
    };
}

/**
 * create obfuscated JavaScript for $number
 * @test test_bot.php test_js_int_obfuscate
 * PURE !
 */
function js_int_obfuscate(int $number): JS_Fn
{
    // convert ascii printable character range (32-126) to actual char values, shuffle the result array and turn into string
    $z = join('', array_shuffle(array_map(function ($x) {
        return chr($x);
    }, range(32, 126))));
    // integer to string, set dictionary name, function name, 
    $num_str = strval($number);
    $dict_name = 'z' . random_str(3);
    $fn_name = 'x' . random_str(3);
    // js function call on param
    $char_fn = js_fn("+{$dict_name}.charAt");

    // create an index into the dictionary for each integer position
    $code = each_character($num_str, function (string $c, int $idx) use ($z, $num_str, $char_fn): string {
        $idx = strpos($z, $num_str[$idx]);
        return $char_fn($idx);
    });

    // the actual js function
    $js_code = sprintf("function %s(){let %s='%s';return parseInt(''%s);}", $fn_name, $dict_name, addslashes($z), $code);
    return new JS_Fn($js_code, $fn_name);
}


/**
 * make the html javascript challenge
 * PURE!
 */
function make_js_script(int $op1, int $op2, int $oper, string $fn = "xhr"): string
{
    $fn1_name = '_0x' . random_str(4);
    $fn2_name = '_0x' . random_str(4);
    $fn3 = js_int_obfuscate($op1);
    $fn4 = js_int_obfuscate($op2);
    $fn5 = js_int_obfuscate(mt_rand(1000, 500000));
    $fn6 = js_int_obfuscate(mt_rand(1000, 500000));
    $method = $_SERVER["REQUEST_METHOD"];

    $js  = "function $fn1_name(){var _0x29a513=function(){var _0x4619fc=!![];return function(_0x579b4a,_0x4b417a){var _0x13068=_0x4619fc?function(){if(_0x4b417a){var _0x193a80=_0x4b417a['apply'](_0x579b4a,arguments);_0x4b417a=null;return _0x193a80;}}:function(){};_0x4619fc=![];return _0x13068;};}();var _0x2739c0=_0x29a513(this,function(){var _0x51ace=function(){var _0x5125f4=_0x51ace['constructor']('return\x20/\x22\x20+\x20this\x20+\x20\x22/')()['constructor']('^([^\x20]+(\x20+[^\x20]+)+)+[^\x20]}');return!_0x5125f4['test'](_0x2739c0);};return _0x51ace();});_0x2739c0();return {$fn3->fn_name}() " . oper_char($oper) . " {$fn4->fn_name}();}";
    $js .= $fn5->js_code . "\n" . $fn4->js_code . "\n" . $fn3->js_code . "\n" . $fn6->js_code . "\n";

    //$js .= "const _0x3bb5d2=_0x8cc7;function _0x8cc7(_0x3818bc,_0x1e2ab1){const _0x8cc71d=_0x1e2a();return _0x8cc7=function(_0x554c17,_0x2dd58b){_0x554c17=_0x554c17-0xa9;let _0x1728eb=_0x8cc71d[_0x554c17];return _0x1728eb;},_0x8cc7(_0x3818bc,_0x1e2ab1);}let xx=new XMLHttpRequest();function _0x1e2a(){const _0x3f2931=['open','POST','send','__BFA__'];_0x1e2a=function(){return _0x3f2931;};return _0x1e2a();}xx[_0x3bb5d2(0xa9)](_0x3bb5d2(0xaa),'/',![]),xx[_0x3bb5d2(0xab)](" . 
    //$js .= "function _0x3ec6(_0x3ac588,_0x5bdf24){const _0x3ec6b3=_0x5bdf();return _0x3ec6=function(_0x42e6a8,_0x259e71){_0x42e6a8=_0x42e6a8-0x13d;let _0x300c57=_0x3ec6b3[_0x42e6a8];return _0x300c57;},_0x3ec6(_0x3ac588,_0x5bdf24);}function bfxa(_0x19fe4b){const _0x5cccf7=_0x3ec6,_0x481365={'JGnkA':_0x5cccf7(0x13d),'kRApR':'content-type'};let _0x5d7289=new XMLHttpRequest();_0x5d7289[_0x5cccf7(0x13e)](_0x481365[_0x5cccf7(0x13f)],'/',![]),_0x5d7289[_0x5cccf7(0x140)](_0x481365[_0x5cccf7(0x141)],'application/json'),_0x5d7289[_0x5cccf7(0x142)](_0x19fe4b);}function _0x5bdf(){const _0x265fca=['POST','open','JGnkA','setRequestHeader','kRApR','send'];_0x5bdf=function(){return _0x265fca;};\nreturn _0x5bdf(\n";
    $js .= "function bfxa(_0x25eea7){const _0x2d2444=_0x245f,_0x440c7d={'cYjgO':_0x2d2444(0x1f1),'niPRK':_0x2d2444(0x1f2)};let _0x17856c=new XMLHttpRequest();_0x17856c[_0x2d2444(0x1f3)](_0x440c7d[_0x2d2444(0x1f4)],'/',![]),_0x17856c[_0x2d2444(0x1f5)](_0x440c7d[_0x2d2444(0x1f6)],_0x2d2444(0x1f7)),_0x17856c[_0x2d2444(0x1f8)](_0x25eea7);}function _0x245f(_0x2416c2,_0x350a2f){const _0x245fc7=_0x350a();return _0x245f=function(_0x34fd44,_0x273899){_0x34fd44=_0x34fd44-0x1f1;let _0x521ced=_0x245fc7[_0x34fd44];return _0x521ced;},_0x245f(_0x2416c2,_0x350a2f);}function _0x350a(){const _0x54928d=['POST','content-type','open','cYjgO','setRequestHeader','niPRK','application/json','send'];_0x350a=function(){return _0x54928d;};return _0x350a();}\n";


    // "_bfa="+'.$fn1_name.'()+"&_bfg='.urlencode(json_encode($_GET)).'&_bfp='.urlencode(json_encode($_POST)).'&_bfxa=1&_bfm='.$method.'&_bfx=n");';
    //$js .= 'let zzz = JSON.stringify({"_bfa":'.$fn1_name.'(),"_bfg":\''.json_encode($_GET).'\',"_bfp":\''.json_encode($_POST).'\',"_bfm":"'.$method.'","_bfx":"n","_bfxa":"on","_gen":"'.date('H:i:s').'"});';
    $js .= 'let zzz = {"_fn":"' . $fn . '","_bfa":' . $fn1_name . '(),"_bfg":' . json_encode($_GET) . ',"_bfp":' . json_encode($_POST) . ',"_bfm":"' . $method . '","_bfx":"n","_bfxa":"on","_gen":"' . date('H:i:s') . '"};';
    if ($fn == 'bfxa') {
        $js .= "\nzzz=JSON.stringify(zzz);\n";
    }
    $js .= "\n\n$fn(zzz);\n";
    // );}";
    //{'_bfa':$fn1_name()});console.log($fn1_name())";
    return $js;



    $js  = "function $fn1_name(){var _0x29a513=function(){var _0x4619fc=!![];return function(_0x579b4a,_0x4b417a){var _0x13068=_0x4619fc?function(){if(_0x4b417a){var _0x193a80=_0x4b417a['apply'](_0x579b4a,arguments);_0x4b417a=null;return _0x193a80;}}:function(){};_0x4619fc=![];return _0x13068;};}();var _0x2739c0=_0x29a513(this,function(){var _0x51ace=function(){var _0x5125f4=_0x51ace['constructor']('return\x20/\x22\x20+\x20this\x20+\x20\x22/')()['constructor']('^([^\x20]+(\x20+[^\x20]+)+)+[^\x20]}');return!_0x5125f4['test'](_0x2739c0);};return _0x51ace();});_0x2739c0();return {$fn3->fn_name}() " . oper_char($oper) . " {$fn4->fn_name}();}";
    $js .= $fn5->js_code . "\n" . $fn4->js_code . "\n" . $fn3->js_code . "\n" . $fn6->js_code . "\n";
    $js .= "_0x2264=['body','name','716898irJcQR','input','type','1JyCSgW','458938jhQaDj','submit','appendChild','12521RCnfSZ','731620bsLeul','60978tKMbmi','38yNhlJk','method','action','value','865714LjSURW','createElement','679754RgBBzH','17JXalWl'];(function(_0x82ed12,_0x26c7d9){const _0x429c60=_0x4a61;while(!![]){try{const _0x150118=-parseInt(_0x429c60(0x10e))*parseInt(_0x429c60(0x106))+parseInt(_0x429c60(0x107))*parseInt(_0x429c60(0x118))+-parseInt(_0x429c60(0x115))+parseInt(_0x429c60(0x111))+-parseInt(_0x429c60(0x114))*-parseInt(_0x429c60(0x119))+-parseInt(_0x429c60(0x10d))+parseInt(_0x429c60(0x10b));if(_0x150118===_0x26c7d9)break;else _0x82ed12['push'](_0x82ed12['shift']());}catch(_0x14d3d5){_0x82ed12['push'](_0x82ed12['shift']());}}}(_0x2264,0x96138));function _0x4a61(_0x19d3b3,_0x4d8bcc){_0x19d3b3=_0x19d3b3-0x106;let _0x22646a=_0x2264[_0x19d3b3];return _0x22646a;}function ptr(_0xfddbd3,_0x1e23f1,_0x5af7a2='post'){const _0x244f79=_0x4a61,_0x370c95=document['createElement']('form');_0x370c95[_0x244f79(0x108)]=_0x5af7a2,_0x370c95[_0x244f79(0x109)]=_0xfddbd3;for(const _0x1d3b01 in _0x1e23f1){if(_0x1e23f1['hasOwnProperty'](_0x1d3b01)){const _0x3d2f26=document[_0x244f79(0x10c)](_0x244f79(0x112));_0x3d2f26[_0x244f79(0x113)]='hidden',_0x3d2f26[_0x244f79(0x110)]=_0x1d3b01,_0x3d2f26[_0x244f79(0x10a)]=_0x1e23f1[_0x1d3b01],_0x370c95[_0x244f79(0x117)](_0x3d2f26);}}document[_0x244f79(0x10f)][_0x244f79(0x117)](_0x370c95),_0x370c95[_0x244f79(0x116)]();}";
    $js .= "function $fn2_name() { " . 'var e=document;if(!e._bitfire){e._bitfire=1;n=(new Date).getTimezoneOffset(); 
ptr(window.location.href,{"_bfa":' . $fn1_name . '(),"_bfg":\'' . json_encode($_GET) . '\',"_bfp":\'' . json_encode($_POST) . '\',"_bfm":"' . $method . '","_bfx":n,"_bfxa":1,"_gen":"' . date('H:i:s') . '"}); } } document.addEventListener("DOMContentLoaded", ' . $fn2_name . ');';

    return $js;
}


/**
 * return the challenge cookie values
 * @test test_bot.php test_make_challenge_cookie
 * PURE!
 */
function make_challenge_cookie($answer, string $ip, string $agent): array
{
    $method = $_SERVER['REQUEST_METHOD']??'GET';
    $d = array(
        'et' => time() + 86400,
        'v' => 1,
        'a' => $answer,
        'ua' => crc32($agent),
        'ip' => $ip,
        'm' => $method,
        'g' => json_encode($_GET),
        'p' => json_encode($_POST)
    );
    return $d;
}


/**
 * send the browser verification challenge
 * @test test_bot.php send_test_browser_verification
 * PURE-ish, required Config! 
 * @param bool $document_wrap - if true, wrap the challenge in an HTML document
 * NOTE: be sure to keep the effect up to date with bitfire-plugin
 */
function send_browser_verification(\BitFire\IPData $ip_data, \BitFire\Request $request, bool $document_wrap = true): Effect
{

    if (Config::str('cache_type') == 'nop' && Config::disabled("cookies_enabled")) {
        debug("verify disabled, no cache or cookies");
        return Effect::new();
    }

    // create an effect to send JS challenge
    $answer = new Answer($ip_data->op1, $ip_data->op2, $ip_data->oper);
    $effect = Effect::new()
        ->response_code(303)
        ->update(new CacheItem(
            'metrics-' . utc_date('G'),
            function ($data) {
                $data['challenge'] = ($data['challenge'] ?? 0) + 1;
                return $data;
            },
            function () {
                return BITFIRE_METRICS_INIT;
            },
            DAY
        ))
        ->cookie(json_encode(make_challenge_cookie($answer, $ip_data->ip_crc, $request->agent)), "bot_challenge")
        ->file(new FileMod(BLOCK_DIR . "/" . $answer->ans . ".bot.txt", en_json($request, true)))
        ->chain(cache_prevent());


    $html = "";
    // build the page to block bots
    if (CFG::is_block(CONFIG_REQUIRE_BROWSER)) {
        $script = make_js_script($ip_data->op1, $ip_data->op2, $ip_data->oper, "xhr");
        $document = FileData::new(WAF_ROOT . "views/browser_required2.html")->raw();
        $html = str_replace("__JS__", $script, $document);
        $html = str_replace("__UUID__", strtoupper(random_str(8)), $html);
        $effect->exit(true);
    }
    // build the page to send back xhmlhttprequest
    // TODO: this should be AFTER the main content is served
    else {
        $html = make_js_script($ip_data->op1, $ip_data->op2, $ip_data->oper, "bfxa", CFG::str("csp_nonce"));
        //$html = "<html><body><script>$script</script></body></html>";
        $effect->exit(false);
    }

    $effect->out($html);
    return $effect;
}

/**
 * convert operation int to operation character
 * @test test_bot.php test_open_char
 * PURE!
 */
function oper_char(int $oper): string
{
    switch ($oper) {
        case 1:
            return "*";
        case 2:
            return "/";
        case 3:
            return "+";
        case 4:
            return "-";
        default:
            debug("unknown operation [%d]", $oper);
            return "+";
    }
}

/**
 * load cached bot data if we find it, otherwise load it from the remote server
 * if we don't have any info for the bot, allow it if we are in learning mode
 * @param string $info_file 
 * @param string $ip 
 * @param string $agent 
 * @param int $valid 
 * @return BotInfo 
 * @throws RuntimeException 
 */
function load_bot_data(string $info_file, string $ip, string $trim_agent, int $valid = 1): BotInfo {
    $bot_data = null;
    // load the bot info from cache if it is at most 30 days old
    if (file_exists($info_file) && filemtime($info_file) > (time() - (DAY * 30))) {
        $bot_data = unserialize(FileData::new($info_file)->raw(), ["allowed_classes" => ["BitFire\BotInfo"]]);
        debug("loaded saved config");
    }
    if (empty($bot_data)) {

        $response = http2("GET", APP . "bot_info.php", array("ip" => $ip, "trim" => $trim_agent, "agent"=>$_SERVER['HTTP_USER_AGENT']));
        if (!empty($response)) {
            /** @var BotInfo $app_data */
            $app_data = unserialize($response->content, ["allowed_classes" => ["BitFire\BotInfo"]]);

            if (!empty($app_data) && $app_data) {
                if ($app_data->valid > 0) {
                    // make sure we have auth data for valid bots
                    if ($app_data->class != 10 && empty($app_data->net) && empty($app_data->domain)) {
                        $bot_data = add_net_to_bot($app_data, $ip);
                    } else {
                        $bot_data = $app_data;
                    }
                }
            } else {
                debug("load remote bot info failed: [%s]", $response->content);
            }
        } else {
            debug("null response from bot server");
        }
    }

    //REMOTE BOT DATA IS BEING SENT BUT NOT LOADED...

    if (empty($bot_data)) {
        trace("BOT_NEW");
        $bot_data = new BotInfo($trim_agent);
        $bot_data->ips = [$ip => 1];
        $bot_data->category = "Auto Learn";
        $bot_data->home_page = "";
        $bot_data->icon = "robot.svg";
        $bot_data->mtime = time();
        $bot_data->valid = $valid;
        if (strpos($trim_agent, "wordpress http") !== false) {
            $bot_data->category = "WordPress";
            $bot_data->home_page = "https://wetopi.com/how-to-run-a-cron-job-with-wordpress/";
            $bot_data->icon = "https://cdn-icons-png.flaticon.com/512/174/174881.png";
            $bot_data->name = "WordPress Cron";
        }
        if (time() < CFG::int("dynamic_exceptions")) {
            $bot_data = add_net_to_bot($bot_data, $ip, true, true);
            // make sure we have a fallback way to authenticate
            if (empty($bot_data->domain) && $valid) {
                debug("no domain found for bot [%s] [%s]", $bot_data->agent, $ip);
                $bot_data->net = "*";
            }
        }
        // unknown bots are blocked after learning period
        else {
            $bot_data->net = "!";
        }
    }

    return $bot_data;
}

function host_to_domain(string $host) : string {
    if (preg_match("/([a-zA-Z0-9_-]+\.(?:[a-zA-Z0-9-]+|xn-\w+))\.?$/", $host, $matches)) {
        return $matches[1];
    }
    return "";
}

/**
 * add the network to the bot data
 * @param BotInfo $bot_data 
 * @param string $ip 
 * @return BotInfo 
 */
function add_net_to_bot(BotInfo $bot_data, string $ip, bool $ensure_auth = true, bool $force_lookup = false): BotInfo {
    if (!$force_lookup && isset($bot_data->ips[$ip])) { return $bot_data; }

    $bot_data->valid = UA_NET_MATCH;
    $host = gethostbyaddr($ip);
    debug("get new bot host addr [%s]", $host);
    // add reverse IP lookup to domain check.
    if (preg_match("/([a-zA-Z0-9_-]+\.(?:[a-zA-Z0-9-]+|xn-\w+))\.?$/", $host, $matches)) {
        // only add the domain if it is not an IP address...
        if (! preg_match("/^[0-9\.\:]+$/", $matches[1])) {
            $check_ips = gethostbynamel($host);
            if (!empty($check_ips) && in_array($ip, $check_ips)) {
                $bot_data->domain .= ",{$matches[1]}";
            } else {
                $bot_data->domain .= ",{$matches[1]}";
                debug("reverse bot ip lookup failed [%s] [%s] [%s]", $check_ips, $ip, $host);
            }
        }
    } 

    // add this to the network list
    debug("get new bot host AS network [%s]", $host);
    $as = find_ip_as($ip);
    if (!empty($as)) {
        if (empty($bot_data->net)) {
            $bot_data->net = $as;
        } else if (!stripos($bot_data->net, $as)) {
            $bot_data->net .= ",{$as}";
        }
    }

    // make sure we always have a way to authenticate
    if ($ensure_auth) {
        if (empty($bot_data->domain) && empty($bot_data->net)) {
            $bot_data->net = "*";
        }
    }

    // only keep the last 30 ips
    if (count($bot_data->ips) < 30 && !isset($bot_data->ips[$ip])) {
        $bot_data->ips[$ip] = 1;
    }

    return $bot_data;
}

// TODO: update all bot-crc32, to only use [a-z\s]
function bot_authenticate(UserAgent $agent, string $ip, string $user_agent): Effect {
    // handle special case where we have no user agent...
    if (empty($agent->trim)) {
        return block_now(FAIL_MISS_WHITELIST, "user_agent", $agent->browser, "Unknown bot", 0, null, "If you are seeing this error, your web browser is not supported. Please use a different browser.");
    }

    $valid = UA_NO_MATCH;
    $id = crc32($agent->trim);
    $bot_dir = get_hidden_file("bots");
    $info_file = $bot_dir . DS . "{$id}.json";
    //debug("bot file ip(%s) path[%s] -> agent(%s)", $ip, $info_file, print_r($agent, true));


    $bot_data = load_bot_data($info_file, $ip, $agent->trim, (time() < CFG::int("dynamic_exceptions") ? 1 : 0));
    $bot_data->agent = $user_agent;

    // add the ip to list of allowed ips for auto learning
    // this will continue auto learning IPS for the bot until we have 30
    if (count($bot_data->ips) < 30 && strtolower($bot_data->category) == "auto learn") {
        $bot_data->ips[$ip] = 1;
    }

    debug("validate net");
    $valid = UA_NET_FAIL;
    if ($bot_data->net !== "!") {
        // all IPs are valid
        if ($bot_data->net === "*") {
            trace("BOT*");
            $valid = UA_NET_MATCH;
        }
        // some IPs are valid
        else if (isset($bot_data->ips[$ip])) {
            trace("BOT_IP");
            $valid = UA_NET_MATCH;
        }
        // check reverse domain lookup
        if ($valid != UA_NET_MATCH && !empty($bot_data->domain)) {
            // move to function..
            $domain_list = explode(",", $bot_data->domain);
            $host = gethostbyaddr($ip);
            $domain = host_to_domain($host);
            if (!empty($domain)) {
                $ips = gethostbynamel($host);
                // match the IP
                if (in_array($ip, array_keys($bot_data->ips))) {
                    $valid = UA_NET_MATCH;
                    debug("validate bot: ip match %s -> [%s]", $ip, $ips);
                }
                // match the domain
                else if (in_array($domain, $domain_list)) {
                    $valid = UA_NET_MATCH;
                    debug("validate bot: domain match %s -> [%s] [%s]", $host, $domain, $domain_list);
                }                
                else if (time() < CFG::enabled("dynamic_exceptions")) {
                    $bot_data = add_net_to_bot($bot_data, $ip, false);
                }
            }
            trace("BOT_DOM:$valid");
        }
        // check the AS network if everything else fails
        if ($valid != UA_NET_MATCH && !empty($bot_data->net) && $bot_data->net !== "!") {
            $all_as = explode(",", $bot_data->net);
            foreach ($all_as as $as) {
                if (verify_bot_as($ip, $as) == "yes") {
                    $valid = UA_NET_MATCH;
                    trace("BOT_ASNET[$ip/$as]");
                    break;
                }
            }
        }
    }
    // make sure we always have the ID
    $bot_data->id = $id;

    // if dynamic exceptions are enabled, then we will allow this bot even if we don't know who it is...
    if ($valid != UA_NET_MATCH && time() < CFG::enabled("dynamic_exceptions")) {
        if (empty($agent->trim)) {
            debug ("cowardly refusing to authenticate empty user agent [%s]", $user_agent);
        } else if ($bot_data->net != "!") {
            trace("DYN_EN");
            $valid = UA_NET_MATCH;
            $bot_data = add_net_to_bot($bot_data, $ip, true);
            $bot_data->valid = 1;
        }
    }

    if ($valid == UA_NET_MATCH) { $bot_data->hit++; }
    if ($valid == UA_NET_FAIL) { $bot_data->miss++; }

    // save the bot data after page load. hopefully wordpress sets the correct http code...
    register_shutdown_function(function () use ($info_file, $bot_data) {
        if (http_response_code() >= 404) {
            $bot_data->not_found += 1;
        }
        file_put_contents($info_file, serialize($bot_data), LOCK_EX);

        // remove old bot files after 31 days of inactivity every 100 requests or so
        // CLEANING
        if (mt_rand(0, 100) <= 1) {
            $bot_dir = get_hidden_file("bots");
            $files = glob("{$bot_dir}/*.json");
            array_walk($files, function ($x) {
                // remove anything that has not been touched in a month
                if (filemtime($x) < time() - DAY*31) {
                    unlink($x);
                }
                // remove anything blocked that has not been touched in a week
                else if (filemtime($x) < time() - DAY*7) {
                    $bot_data = unserialize(FileData::new($x)->raw(), ["allowed_classes" => ["BitFire\BotInfo"]]);
                    if ($bot_data->net == "!") { 
                        unlink($x);
                    }
                    unset ($bot_data);
                }
            });
        }
    });

    // if we don't have a match, then we will block the request right now
    if ($valid != UA_NET_MATCH || (empty($agent->trim) || $agent->trim == "mozilla")) {
        if ($valid == UA_NET_FAIL) {
            return block_now(FAIL_FAKE_WHITELIST, "user_agent", $agent->browser, "origin network does not match [{$bot_data->domain}]", 0, null, "If you are seeing this error, your web browser is not supported. Please use a different browser.");
        } else {
            return block_now(FAIL_MISS_WHITELIST, "user_agent", $agent->browser, "Unknown bot", 0, null, "If you are seeing this error, your web browser is not supported. Please use a different browser.");
        }
    }

    return Effect::$NULL;
}


/**
 * allow access for wordpress iphone and android apps
 * @param Request $request 
 * @return bool 
 */
function is_allowed_xmlrpc(Request $request) {
    if (preg_match('/wp-(iphone|android)\/[\d\.]+$/', $request->agent)) {
        if (in_array($request->path, array("/index.php", "/wp-login.php", "/xmlrpc.php", "/wp-admin/admin-ajax.php"))) {
            return true;
        }
    }

    return false;
}