firewall/src/cms.php
<?php
/**
* BitFire PHP based Firewall.
* Author: BitFire (BitSlip6 company)
* Distributed under the AGPL license: https://www.gnu.org/licenses/agpl-3.0.en.html
* Please report issues to: https://github.com/bitslip6/bitfire/issues
*
* all functions are called via api_call() from bitfire.php and all authentication
* is done there before calling any of these methods.
*/
namespace BitFire;
use ThreadFin\CacheItem;
use ThreadFin\CacheStorage;
use \BitFire\Config as CFG;
use BitFireSvr\FileHash;
use Exception;
use FineDiff;
use OutOfBoundsException;
use RuntimeException;
use ThreadFin\Effect;
use ThreadFin\FileData;
use ThreadFin\FileMod;
use ThreadFin\Pair;
use const BitFire\APP;
use const BitFire\FILE_W;
use const ThreadFin\DAY;
use function BitFirePlugin\check_user_cap;
use function BitFirePlugin\version_from_path;
use function BitFireSvr\cms_root;
use function BitFireSvr\hash_file3;
use function BitFireSvr\parse_scan_config;
use function ThreadFin\compress;
use function ThreadFin\contains;
use function ThreadFin\dbg;
use function ThreadFin\ends_with;
use function ThreadFin\find_const_arr;
use function ThreadFin\find_fn;
use function ThreadFin\id_fn;
use function ThreadFin\debug;
use function ThreadFin\en_json;
use function ThreadFin\file_yield;
use function ThreadFin\get_hidden_file;
use function ThreadFin\HTTP\http2;
use function ThreadFin\HTTP\http3;
use function ThreadFin\icontains;
use function ThreadFin\index_yield;
use function ThreadFin\machine_date;
use function ThreadFin\trace;
use function ThreadFin\un_json;
use function ThreadFin\random_str;
use function ThreadFin\uncompress;
const ENUMERATION_FILES = ["readme.txt", "license.txt"];
const PLUGIN_DIRS = ["/plugins/", "/themes/"];
const PACKAGE_FILES = ["readme.txt", "README.txt", "package.json"];
const RISKY_JS = ["fromCharCode"];
const WP_FN = "|wp_create_user";
const UPLOAD_FN = "|move_uploaded_file";
const VAR_FN = '|\$[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*';
const USER_FN = "|call_user_func|call_user_func_array|create_function";
const FN1_RX = '/[\@\s\(\);\/](?:header|mail|uudecode|hebrev|hex2bin|str_rot13|eval|proc_open|pcntl_exec|exec|shell_exec|system|passthru%s*)\s*(?:\[[^\]]*?\])?\s*(?:(?:#[^\n]*\n)|(?:\/\/[^\n]*\n)|(?:\/\*.*?\*\/))?\(\s*(?:[\.\$_]*)?/misS';
const CHAR_NL = 10;
const CHAR_HASH = 61;
const CHAR_SLASH = 73;
const PROFILE_INIT = ["^a" => 0, "^u" => 0, "^g" => 0, "^p" => 0, "^c" => []];
const PROFILE_MAX_PARAM = 30;
const PROFILE_MAX_VARS = 20;
const PROFILE_MAX_CAPS = 20;
if (!function_exists("\BitFirePlugin\\file_type")) {
$standalone_wp_include = \BitFire\WAF_ROOT . "wordpress-plugin" . DS . "includes.php";
$standalone_custom_include = \BitFire\WAF_ROOT . "custom-plugin" . DS . "includes.php";
if (file_exists($standalone_wp_include) && file_exists(dirname(__DIR__, 2) . DS . "wp-load.php")) {
if (file_exists($standalone_wp_include)) {
trace("wp_alone");
require_once $standalone_wp_include;
}
}
else if (file_exists(\BitFire\WAF_ROOT . "includes.php")) {
trace("wp_root");
include_once \BitFire\WAF_ROOT . "includes.php";
} else {
trace("custom");
$standalone_custom_plugin = \BitFire\WAF_ROOT . "custom-plugin" . DS . "bitfire-plugin.php";
@include_once $standalone_custom_include;
@include_once $standalone_custom_plugin;
}
}
class ScanConfig {
public $wp_func = true;
public $includes = true;
public $access_time = true;
public $unknown_core = true;
public $unknown_plugin = false;
public $non_php = false;
public $standard_scan = false;
public $freq_limit = 512;
public $line_limit = 2048;
public $random_name_per = 50;
public $fn_freq_limit = 20;
public $fn_line_limit = 768;
public $fn_random_name_per = 30;
public $quick = true;
public $extra_regex = true;
}
/**
* a root class all of our classes
* @package ThreadFin
*/
class Entity
{
}
/**
* a <generic> list of errors
* @package
*/
abstract class Typed_List implements \ArrayAccess, \Iterator, \Countable, \JsonSerializable {
/* @var int $_position */
protected $_position = 0;
/* @var array $_list */
public $_list = [];
public $associated = false;
// return the number of items in the list
public function count(): int
{
return count($this->_list);
}
// SeekableIterator impl. seek a specific position in the list
public function seek($position)
{
if (!isset($this->_list[$position])) {
throw new OutOfBoundsException("invalid seek position ($position)");
}
$this->_position = $position;
}
// SeekableIterator impl. reset the list position to the first element
public function rewind(): void
{
$this->_position = 0;
}
// SeekableIterator impl. return the current index
#[\ReturnTypeWillChange]
public function key()
{
return $this->_position;
}
// SeekableIterator impl. move to the next element
public function next(): void
{
++$this->_position;
}
// SeekableIterator impl. check if the current position is valid
public function valid(): bool
{
return isset($this->_list[$this->_position]);
}
// ArrayAccess impl. set the value at a specific index
public function offsetSet($index, $value): void
{
if (empty($index)) {
$this->_list[] = $value;
} else {
$this->_list[$index] = $value;
$this->associated = true;
}
}
// ArrayAccess impl. remove(unset) the value at a specific index
public function offsetUnset($index): void
{
unset($this->_list[$index]);
}
// ArrayAccess impl. check if the value at a specific index exists
public function offsetExists($index): bool
{
return isset($this->_list[$index]);
}
// Sort the list by key values
public function ksort(int $flags = SORT_REGULAR): bool
{
return ksort($this->_list, $flags);
}
public function getIterator(): \Traversable
{
return $this;
}
/**
* This method allows us to call json_encode() and not have a "_list" sub-object
* @return array the list data
*/
public function jsonSerialize(): array
{
return $this->_list;
}
// helper method
public function empty(): bool
{
return empty($this->_list);
}
/**
* append a list to this list
* @param Typed_List $list
* @return void
*/
public function append_list(Typed_List $list) {
file_put_contents("/tmp/list_append.txt", json_encode($list->raw(), JSON_PRETTY_PRINT) . "\n\n", FILE_APPEND);
if (count($list) > 0) {
if ($this->associated) {
$this->_list = array_merge($this->_list, $list->raw());
} else {
foreach ($list->_list as $item) {
$this->_list[] = $item;
}
//echo json_encode($this->_list, JSON_PRETTY_PRINT). "\n";
//dbg($this, "APPEND");
}
}
}
public function &raw() { return $this->_list; }
//public abstract function add($item) : void;
#[\ReturnTypeWillChange]
public abstract function offsetGet($index);
// SeekableIterator impl. return the element at $this->_position.
// override the return type!
#[\ReturnTypeWillChange]
public abstract function current();
}
/**
* malware analysis
*
* @package BitFire
*/
class Malware extends Entity
{
/** @var float $frequency */
public $frequency = 0.0;
/** @var int $php_count */
public $php_count = 0;
/** @var int $location 0 - beginning, 1 - middle, 2 - end */
public $location = 0;
/** @var string $pre_text */
public $pre_text = "";
/** @var int $content_offset */
public $content_offset = 0;
/** @var string $content */
public $content = "";
/** @var string $post_text */
public $post_text = "";
/** @var string $note */
public $note = "";
/** @var int $pre_indent */
public $pre_indent = "";
/** @var int $content_indent */
public $content_indent = 0;
/** @var int $post_indent */
public $post_indent = 0;
/** @var int $file_size */
public $file_size = 0;
/** @var File_Info_Block $info */
public $info;
/** @var int $max_length */
public $max_length = 0;
/** @var int $per_unknown */
public $per_unknown = 0;
/** @var string $path */
public $path;
public $unique;
public $known = false;
public $ctime;
public $url;
public function __construct() {
$this->unique = random_str(8);
}
}
/**
* a typed list of Malware
*/
class Malware_List extends Typed_List
{
public $num_scanned = 0;
public $num_skipped = 0;
public $complete = 0;
public $_list = [];
/**
* add a new malware item to the list
* will only add malware with a frequency > 1.0
* @param null|TF_Error $error
* @return void
*/
public function add(?Entity $malware) {
if ($malware != null) {
assert($malware instanceof Malware, "Malware_List can only contain Malware objects");
debug("CREATE MALWARE %s", $malware);
$this->_list[] = $malware;
}
}
/**
* @return Malware
*/
public function offsetGet($index) : ?Malware {
return $this->_list[$index] ?? null;
}
/**
* @return Malware
*/
public function current() : ?Malware {
return $this->_list[$this->_position];
}
public function inc_scanned() {
$this->num_scanned++;
}
public function inc_skipped() {
$this->num_skipped++;
}
public function set_scanned(int $num) {
$this->num_scanned = $num;
}
}
/**
* file metadata for malware analysis
* @package BitFire
*/
class File_Info_Block
{
/** @var array float $frequency */
public $frequency;
/** @var array float $slash_freq */
public $slash_freq;
/** @var array float $hash_freq */
public $hash_freq;
/** @var array float $block_freq */
public $block_freq;
/** @var int $indentation_level 0-32656 spaces, 32565-64435 tabs */
public $indent_level;
/** $var int $lines number of lines in this file info block */
public $lines;
}
function find_malware(string $file) : ?Malware {
$malware = NULL;
return $malware;
}
/**
* get the path to the hash file
* @param string $file_path
* @param string $rel_path
* @return string
*/
function get_hash_path(string $file_path, string $rel_path) : string {
$content_dir = CFG::str("cms_content_dir");
if (contains($file_path, $content_dir)) {
$strip_root = str_replace($content_dir, "", $file_path);
$parts=explode(DS, trim($strip_root, DS));
array_shift($parts);
array_shift($parts);
$no_plugin_name = join(DS, $parts);
$base_root = str_replace($no_plugin_name, "", $strip_root);
} else {
$base_root = (contains($file_path, "wp-content")) ? "content-root" : "cms-root";
}
return trim(str_replace(DS, "_", $base_root), "_");
}
/**
* add the hash to the list of passing hashes
* @param string $hash_path
* @param int $crc_path
* @param int $crc_trim
* @return void
*/
function add_to_pass_hash(string $hash_path, int $crc_path, int $crc_trim) {
static $list = null;
if ($list == null) {
$list = [];
register_shutdown_function(function() use (&$list) {
foreach ($list as $base_root => $file_list) {
$file_name = get_hidden_file("quick_map".DS.$base_root.".json");
if (file_exists($file_name)) {
$data = json_decode(file_get_contents($file_name), true);
} else {
@mkdir(get_hidden_file("quick_map"));
$data = [];
}
foreach ($file_list as $crc_path => $crc_trim) {
$data[$crc_path] = $crc_trim;
}
file_put_contents($file_name, json_encode($data));
}
});
}
if (!isset($list[$hash_path])) {
$list[$hash_path] = [];
}
$list[$hash_path][$crc_path] = $crc_trim;
}
/**
*
* @param FileHash $hash
* @return bool
* @throws Exception
*/
function is_pass_hash(FileHash $hash) : bool {
static $map = [];
$base_root = get_hash_path($hash->file_path, $hash->rel_path);
if (!isset($map[$base_root])) {
$file = get_hidden_file("quick_map".DS.$base_root.".json");
if (file_exists($file)) {
$map[$base_root] = json_decode(file_get_contents($file), true);
} else {
$map[$base_root] = [];
}
}
if (isset($map[$base_root])) {
//xdebug_break();
if (isset($map[$base_root][$hash->crc_path])) {
return $map[$base_root][$hash->crc_path] == $hash->crc_trim;
}
}
return false;
}
/**
* TODO, scan plugins and themes, and pull down list of known plugins and themes
* only send to hash_compare known files
* @param string $index_file
* @param int $skip_files
* @return Malware_List
* @throws RuntimeException
* @throws Exception
*/
function scan_filesystem(string $index_file, int $skip_files = 0, int $max_files = 120, ?ScanConfig $config = NULL) : Malware_List {
require_once WAF_SRC . "/server.php";
$counter = 0;
$list = new Malware_List();
$list->complete = 0;
$type_fn = "\BitFirePlugin\\file_type";
$plugins = "/wp-content/plugins/";
$themes = "/wp-content/themes/";
$batch = [];
$results = [];
$unknown = [];
$allow_map = [];
if ($config == NULL) { $config = parse_scan_config(CFG::arr("malware_config")); }
$passable = [
1579353588 => 2386763083,
2008795106 => 3433127016,
730207289 => 347445098,
0 => 2042742896,
2498048709 => 1,
2639749952 => 1,
1864550530 => 1,
3551137528 => 1,
311902961 => 1
];
// the manual allow list
$allowed = FileData::new(get_hidden_file("hashes.json"))->read()->un_json()->lines;
if ($allowed === null || empty($allowed)) { $allowed = [];}
foreach ($allowed as $file) { $allow_map[$file["path"]] = $file["trim"]; }
$ver_fn = '\BitFirePlugin\\version_from_path';
$reg_ex = (!$config->non_php) ? NULL : "/.*\.php/";
//foreach (index_yield($root_path, $reg_ex, $max_files, $skip_files) as $file) {
$root = cms_root();
foreach (index_yield($index_file, $max_files) as $file) {
if ($file == null) {
$list->complete = 1;
break;
}
$list->inc_scanned();
if (isset($allow_map[$file])) {
$list->inc_skipped();
continue;
}
$file_hashed = hash_file3($file, $type_fn, $ver_fn, $root);
if (empty($file_hashed) || $file_hashed->skip) { $list->inc_skipped(); continue; }
// skip files that are not js or php, or do not have malware functions
//if ($file_hashed->skip) { file_put_contents("/tmp/skipped.txt", "$file\n", FILE_APPEND); }
// skip known good files
if ($config->standard_scan == false && is_pass_hash($file_hashed)) { $list->inc_skipped(); continue; }
// skip empty index files
if (in_array($file_hashed->crc_trim, [3574178858, 3551137528, 1162311920])) { continue; }
if (isset($passable[$file_hashed->crc_path])) {
if ($passable[$file_hashed->crc_path] == 1 || $passable[$file_hashed->crc_path] == $file_hashed->crc_trim) {
continue;
}
}
if ($file_hashed->type == "unknown") {
$unknown[] = $file_hashed;
} else {
$batch[] = $file_hashed;
}
}
if (filesize($index_file) < 256) { $list->complete = 1; }
$h2 = en_json(["ver" => 1.0, "files" => $batch]);
$compressed = compress($h2);
$response = http2("POST", APP."hash_compare2.php", $compressed[0], array("Content-Type" => "application/json", "X-COMPRESSION" => $compressed[2], "ACCEPT-ENCODING"));
$decoded = un_json($response->content);
$result = un_json(uncompress($decoded));
debug("hash_compare result [%d]", count($result));
foreach ($result as $item) {
if (isset($item["r"]) && $item["r"] === "PASS") {
add_to_pass_hash(get_hash_path($item["file_path"], $item["rel_path"]), $item["crc_path"], $item["crc_trim"]);
continue;
}
// TODO: check if we found the actual file...
if ($item["found"]) {
$results[] = $item;
} else {
$unknown[] = $item;
}
}
// scan for malware in infected WordPress files
for ($i=0; $i<count($results); $i+=20) {
$t0 = microtime(true);
$infected_files = batch_enrich(array_slice($results, $i, 20), $config);
$t1 = microtime(true);
foreach ($infected_files as $file) {
if (isset($file['malware']) && count($file['malware']) > 0) {
//xdebug_break();
$list->append_list($file['malware']);
} else {
add_to_pass_hash(get_hash_path($file["file_path"], $file["rel_path"]), $file["crc_path"], $file["crc_trim"]);
}
}
}
foreach ($unknown as $check_file) {
if (empty($check_file)) { continue; }
$miss = false;
$path = "";
$type = "core";
if (is_object($check_file)) {
$path = $check_file->file_path;
$type = $check_file->type;
if ($config->unknown_core && $check_file->name == "root" && $check_file->type == "unknown") {
$type = "ROOT";
$miss = true;
}
} else {
$path = $check_file["file_path"];
$type = $check_file["type"];
if ($config->unknown_core && ($check_file["type"] == "wp_core" || $check_file["table"] == "core") && $check_file["size2"] < 1) {
$type = "wp_core";
$miss = true;
}
if (intval($check_file["plugin_id"]??0) > 1 && $check_file["r"] == "MISS") {
/* XXX keep this? lots of false positives, but could help identify hard to find malware... TODO: make this an option
if ($config->unknown_plugin) {
$type = "MISS PLUGIN: " . $check_file["plugin_id"];
$miss = true;
}
*/
}
}
if ($miss) {
$content = file_get_contents($path, false, null, 0, 2048);
$m = malware_metrics($content, false, $path, $config);
$m->content = substr($content, 0, 2048);
$m->note = sprintf("unknown <%s> file", $type);
$list->add($m);
} else {
if (!empty($check_file->file_path)) {
$known = intval($check_file->plugin_id) > 0 && $check_file["r"] != "MISS";
$m2 = cms_find_malware($check_file->file_path, $known, 0, $config);
if (count($m2) > 0) {
$list->append_list($m2);
} else {
add_to_pass_hash(get_hash_path($check_file->file_path, $check_file->rel_path), $check_file->crc_path, $check_file->crc_trim);
}
} else {
$known = intval($check_file["plugin_id"]??0) > 0 && $check_file["r"] != "MISS";
$m2 = cms_find_malware($check_file["file_path"], $known, 0, $config);
if (count($m2) > 0) {
$list->append_list($m2);
} else {
add_to_pass_hash(get_hash_path($check_file["file_path"], $check_file["rel_path"]), $check_file["crc_path"], $check_file["crc_trim"]);
}
}
}
}
return $list;
}
/**
* return array of pairs (file, ctime) that have odd access times
* @param string $directory
* @return array
*/
function odd_access_times(string $directory) : array {
$files = [];
$access_count = [];
$ctime_to_file = [];
/* scan $directory and stat each file,
* store access times in hash map
*/
$dh = opendir($directory);
while (($file = readdir($dh)) !== false) {
if ($file == '.' || $file == '..') {
continue;
}
$path = $directory . '/' . $file;
if (is_file($path)) {
$stat = stat($path);
$access_count[$stat['ctime']] = ($access_count[$stat['ctime']] ?? 0) + 1;
$ctime_to_file[$stat['ctime']][] = $path;
}
}
closedir($dh);
// remove the most common access time
asort($access_count);
array_pop($access_count);
foreach ($access_count as $time => $count) {
if (isset($ctime_to_file[$time])) {
$files = array_merge($files, $ctime_to_file[$time]);
}
}
// map the files with creation time
$files_with_time = array_map(function($file) {
return new Pair($file, filectime($file));
}, $files);
return $files_with_time;
}
/**
* pure function to compare content of php code against frequency table
* @test test_malware/test_char_freq_analysis
* @param string $content
* @param array $compare_freq
* @return float
*
*/
function char_freq_analysis(array $test_frequency, array $compare_freq): float
{
$lines = $test_frequency[10] ?? 1;
$likely = 0.0;
// UGLY, split 2x for performance, called a lot
for ($x = 0; $x <= 64; $x++) {
if (!isset($test_frequency[$x])) {
continue;
}
$i = $x + 128;
$test = round(($test_frequency[$x] / $lines), 4);
if (isset($compare_freq[$i])) {
if ($test > $compare_freq[$i]["u"]) {
$rat1 = $test / $compare_freq[$i]["u"];
if ($rat1 > 1.4) {
$likely += ($rat1 - 1.0);
}
}
}
}
for ($x = 91; $x <= 96; $x++) {
if (!isset($test_frequency[$x])) {
continue;
}
$i = $x + 128;
$test = round(($test_frequency[$x] / $lines), 4);
if (isset($compare_freq[$i])) {
if ($test > $compare_freq[$i]["u"]) {
$rat1 = $test / $compare_freq[$i]["u"];
if ($rat1 > 1.4) {
$likely += ($rat1 - 1.0);
}
}
}
}
for ($x = 123; $x <= 126; $x++) {
if (!isset($test_frequency[$x])) {
continue;
}
$i = $x + 128;
$test = round(($test_frequency[$x] / $lines), 4);
if (isset($compare_freq[$i])) {
if ($test > $compare_freq[$i]["u"]) {
$rat1 = $test / $compare_freq[$i]["u"];
if ($rat1 > 1.4) {
$likely += ($rat1 - 1.0);
}
}
}
}
return round($likely, 2);
}
/**
* return an array of plugin file info in 5K chuck block sizes
* @param string $content
* @return array
*/
function get_plugin_file_info(string $content, array $compare_freq): void
{
$size = strlen($content);
$index = 0;
while ($index < $size) {
$block = substr($content, $index, 5000);
$index += 5000;
$info = new File_Info_Block();
$char_counts = count_chars($block, 1);
$lines = $char_counts[CHAR_NL] ?? 1;
$info->hash_freq = $char_counts[CHAR_HASH] ?? 0 / $lines;
$info->slash_freq = $char_counts[CHAR_HASH] ?? 0 / $lines;
$info->indent_level = get_line_indents($block);
$info->frequency = char_freq_analysis($char_counts, $compare_freq);
$info->lines = $lines;
}
}
function get_line_indents(string $input): int
{
preg_match_all("/^\s+[a-zA-Z\$]/mis", $input, $matches, PREG_OFFSET_CAPTURE);
$spaces = 0;
$tabs = 0;
foreach ($matches[0] as $match) {
$counts = count_chars($match[0], 1);
$spaces += $counts[32] ?? 0;
$tabs += $counts[9] ?? 0;
}
$lines = max(1, substr_count($input, "\n"));
$spaces /= $lines;
$tabs /= $lines;
$base = min(floor($spaces), 0x7FFF);
$off = min(floor($tabs), 0x7FFF) << 15;
return intval($base | $off);
}
function indent_to_space(int $input): int
{
return $input & 0x7FFF;
}
function indent_to_tab(int $input): int
{
$n1 = indent_to_space($input);
$core = $input - $n1;
return $core << 15;
}
/**
* find list of malware in a file
* @param string $path
* @return array
*/
function cms_find_malware(string $path, bool $known, int $batch_size, ScanConfig $config): Malware_List
{
$file = FileData::new($path);
if (!$file->exists) {
debug("cms check file does not exist [%s]", $path);
return new Malware_List();
}
$content = $file->raw();
return cms_find_malware_str($content, $known, $path, $config);
}
class High_Frequency
{
/** @var string $content */
public $content = "";
/** @var float $frequency */
public $frequency = 0.0;
}
/**
* take a code input sample $content, and return the 4K chuck with the highest frequency
* @param array $frequency_table - the comparison table
* @param string $content - the content to check
* @return High_Frequency
*/
function frequency_analysis(array $frequency_table, string $content): High_Frequency
{
$index = 0;
$size = strlen($content);
$freq = new High_Frequency();
while ($index < $size) {
$sample = substr($content, $index, 4096);
$char_counts = count_chars($sample, 1);
$frequency = char_freq_analysis($char_counts, $frequency_table);
if ($frequency > $freq->frequency) {
$freq->content = $sample;
$freq->frequency = $frequency;
}
$index += 4096;
}
return $freq;
}
function malware_creator(string $path, string $per_unknown, bool $php_count, $frequency, $max_len, $size, $known): callable
{
return function (string $content, string $pre, string $post, string $note) use ($known, $per_unknown, $php_count, $frequency, $max_len, $size, $path): Malware {
$malware = new Malware();
$malware->pre_text = $pre;
$malware->post_text = $post;
$malware->content = $content;
$malware->note = $note;
$malware->path = $path;
$malware->known = $known;
$malware->ctime = filectime($path);
$malware->content_indent = get_line_indents($content);
$malware->pre_indent = get_line_indents($pre);
$malware->post_indent = get_line_indents($post);
$malware->frequency = $frequency;
$malware->max_length = $max_len;
$malware->file_size = $size;
$malware->per_unknown = $per_unknown;
$malware->php_count = $php_count;
//if ($malware->content_indent < $malware->pre_indent && $malware->content_indent < $malware->post_indent) {
// $malware->frequency += 3.0;
//}
return $malware;
};
}
/**
* return a list of all variable names from $contents
* @param string $contents
* @return array array keys are variable names
*/
function get_names(string $contents) {
$name_list = [];
$tokens = token_get_all($contents);
for ($i = 0, $count = count($tokens); $i < $count; $i++) {
if ($tokens[$i][0] === T_VARIABLE) {
if ($tokens[$i][0] === T_VARIABLE) {
$len = strlen($tokens[$i][1]);
$num_cap = strlen(preg_replace("/[^A-Z]/", "", $tokens[$i][1]));
// split on caps only if we have mostly lower case. prevents splitting EvIlStR into single chars
$split_regex = (($len/3) > $num_cap) ? '/((?=[A-Z])|_|\$)/' : '/(?=_|\$)/';
$bits = preg_split($split_regex, $tokens[$i][1]);
foreach ($bits as $bit) {
if (!empty($bit)) {
$bit_lower = strtolower(str_replace('$', '', $bit));
if (in_array($bit_lower, ['_COOKIE', '_POST', '_GET'])) {
continue;
}
$name_list[$bit_lower] = 1;
}
}
}
}
}
return $name_list;
}
/**
* map a list of php file token names to a percentage of found tokens
* @param array $token_names
* @return int
*/
function not_found_percentage(array $token_names, array $allow_tokens) : int {
$found = $not_found = 0;
$f = "";
$n = "";
foreach ($token_names as $token => $count) {
// skip the super common tokens
if ($token == '$_COOKIE' || $token == '$_POST' || $token == '$_GET') { continue; }
// split on caps only if we have mostly lower case. prevents splitting EvIlStR into single chars
$bits = preg_split('/((?=[A-Z])|_|\$)/', $token);
foreach ($bits as $bit) {
// skip variable names that are 1 char long
if (strlen($bit) < 2) { continue; }
if (isset($allow_tokens[$bit])) {
$f .= ", $bit";
$found++;
} else {
$n .= ", $bit";
$not_found++;
}
}
}
// if we don't have enough tokens, we can't really make a good guess
if ($found + $not_found == 0) {
$not_found_per = 0;
} else {
$not_found_per = round(($not_found / ($found + $not_found)) * 100);
debug("F[%s] N[%s] %d/%d = per:%d", $f, $n, $found, $not_found, $not_found_per);
}
return $not_found_per;
}
/**
* DUP from cms_find_malware_str
* @param string $content
* @param bool $known
* @param string $file_name
* @param ScanConfig $config
* @return Malware
* @throws Exception
*/
function malware_metrics(string $content, bool $known, string $file_name, ScanConfig $config): Malware
{
static $allow_tokens = null;
static $freq = null;
$size = strlen($content);
$m = new Malware();
$m->ctime = filectime($file_name);
$m->file_size = filesize($file_name);
$m->frequency = 0.0;
$m->max_length = $size;
$m->path = $file_name;
$m->per_unknown = 0;
$m->unique = random_str(10);
if ($size < 10) {
return $m;
}
debug("search for malware in file [%s] len:%d", $file_name, $size);
// only load the allow tokens 1x
$file = (file_exists(getcwd() . "/tokens.json")) ? getcwd() . "/tokens.json" : WAF_ROOT . "cache/tokens2.json";
if (empty($allow_tokens) || time() > filemtime($file)) {
debug("reload allow tokens");
$allow_tokens = FileData::new($file)->read()->un_json()->lines;
}
$file = (file_exists(getcwd() . "/frequency.json")) ? getcwd() . "/frequency.json" : WAF_ROOT . "cache/char_frequency.json";
if ($freq === null || time() > filemtime($file)) {
$freq = un_json(FileData::new($file)->raw());
}
// if file is known, then we will only have a partial diff and we should not try to find tags
// since there will be none.
// if the file is unknown with no tags, we can return early!
// if the file is unknown, we need a php tag to start with
if (!$known && preg_match_all("/\<\?.*?([^\"']\?>[^\"']|$)/isDSu", $content, $matches)) {
$c1 = array_reduce(array_values($matches[0]), function ($carry, $x) { // use ($file_name, $content) {
return $carry . preg_replace("/(\<\?php|\?\>)/", "", $x);
}, "");
// clean up common junk
$c1 = preg_replace("/\s*_\w\s*\(\s*[\'\"][^\'\"]+.*\;/", "", $c1);
}
// if the file is unknown and has no php tag, we can ignore it
else if (!$known) {
// bail out early, it's not actually php SMH
return $m;
}
// the file is known, so we can just use the content since the DIFF function only returns code with php functions
else {
$c1 = $content;
}
// trim off comments and svg paths
// TODO: switch to php token parse...
// we are already parsing tokens in get_names, so we can just use that
$line_no = 0;
// remove comments
$c2 = preg_replace("/(\/\/|#).*$/m", "", $c1);
$c3 = preg_replace("/\/\*.*?\*\//ms", "", $c2);
$c3 = preg_replace("/\<path\s+.*?[\<\>;]/ms", "", $c3);
$frequency = frequency_analysis($freq, $c3);
$token_names = get_names("<?php\n$c1");
$not_found_per = not_found_percentage($token_names, $allow_tokens);
// find the longest line
$lines = explode("\n", "$c3\n");
$max_line = "";
$max_len = array_reduce($lines, function ($carry, $x) use (&$line_no, &$max_line) {
static $ctr = 0;
$ctr++;
$len = strlen($x);
if ($len > $carry) {
$line_no = $ctr;
$max_line = $x;
return $len;
}
return $carry;
}, 0);
$m->frequency = $frequency->frequency;
$m->per_unknown = $not_found_per;
$m->max_length = $max_len;
return $m;
}
function cms_find_malware_str(string $content, bool $known, string $file_name, ScanConfig $config): Malware_List
{
static $allow_tokens = null;
static $freq = null;
$size = strlen($content);
$list = new Malware_List();
if ($size < 10) {
return $list;
}
/*
if (contains($file_name, "malware-test")) {
xdebug_break();
}
*/
debug("search for malware in file [%s] len:%d", $file_name, $size);
// only load the allow tokens 1x
$file = (file_exists(getcwd() . "/tokens.json")) ? getcwd() . "/tokens.json" : WAF_ROOT . "cache/tokens2.json";
if ($allow_tokens === null || time() < filemtime($file) + 20) {
debug("reload allow tokens");
$allow_tokens = FileData::new($file)->read()->un_json()->lines;
}
$file = (file_exists(getcwd() . "/frequency.json")) ? getcwd() . "/frequency.json" : WAF_ROOT . "cache/char_frequency.json";
if ($freq === null || time() < filemtime($file) + 20) {
$freq = un_json(FileData::new($file)->raw());
}
// if file is known, then we will only have a partial diff and we should not try to find tags
// since there will be none.
// if the file is unknown with no tags, we can return early!
// if the file is unknown, we need a php tag to start with
if (!$known && preg_match_all("/\<\?.*?([^\"']\?>[^\"']|$)/isDSu", $content, $matches)) {
$c1 = array_reduce(array_values($matches[0]), function ($carry, $x) { // use ($file_name, $content) {
return $carry . preg_replace("/(\<\?php|\?\>)/", "", $x);
}, "");
}
// if the file is unknown and has no php tag, we can ignore it
else if (!$known) {
// echo "search for malware in file [$file_name]\n$content\n\n";
// bail out early, it's not actually php SMH
return $list;
}
// the file is known, so we can just use the content since the DIFF function only returns code with php functions
else {
$c1 = $content;
}
// trim off comments and svg paths
// TODO: switch to php token parse...
// we are already parsing tokens in get_names, so we can just use that
$php_count = 1;
$line_no = 0;
// remove comments
$c2 = preg_replace("/(\/\/|#).*$/m", "", $c1);
$c3 = preg_replace("/\/\*.*?\*\//ms", "", $c2);
$c3 = preg_replace("/\<path\s+.*?[\<\>;]/ms", "", $c3);
// clean up common junk
$c3 = preg_replace("/\s*_\w\s*\(\s*[\'\"][^\'\"]+.*\;\s*[\'\"]?\>?/", "", $c3);
$code_len = strlen($c3);
$frequency = frequency_analysis($freq, $c3);
/*
// get the lowest frequency based on any known existing allowed frequency tables
if ($frequency->frequency > $config->fn_freq_limit || $frequency->frequency > $config->freq_limit) {
$file_list = glob(WAF_ROOT . "cache/char_frequency_*.json");
foreach ($file_list as $file) {
$freq = un_json(FileData::new($file)->raw());
$frequency2 = frequency_analysis($freq, $c3);
if ($frequency2->frequency < $frequency->frequency) {
$frequency = $frequency2;
}
}
}
*/
$token_names = get_names("<?php\n$c1");
$not_found_per = not_found_percentage($token_names, $allow_tokens);
// find the longest line
$lines = explode("\n", "$c3\n");
$max_line = "";
$max_len = array_reduce($lines, function ($carry, $x) use (&$line_no, &$max_line) {
static $ctr = 0;
$ctr++;
$len = strlen($x);
if ($len > $carry) {
$line_no = $ctr;
$max_line = $x;
return $len;
}
return $carry;
}, 0);
$malware_factory = malware_creator($file_name, $not_found_per, $php_count, $frequency->frequency, $max_len, $size, $known);
// long line malware
if ($config->line_limit > 0 && $max_len > $config->line_limit) {
debug("LONG LINE MALWARE");
/** @var Malware $m */
$m = $malware_factory(
substr($max_line, 0, 4096),
substr($lines[$line_no-(max($line_no-1, 0))], 0, 1024),
substr($lines[(min($line_no+1, count($lines)-1))], 0, 1024),
"Long line: $max_len characters");
$list->add($m);
}
// we can bail out early iff we have already detected malware
/*
if ($list->count() > 0) {
debug("BASIC MALWARE: maxlen: %d, freq %d, not found %d [%s]", $max_len, $frequency->frequency, $not_found_per, $file_name);
return $list;
}
debug("DETECT MALWARE: maxlen: %d, freq %d, not found %d [%s](%d)", $max_len, $frequency->frequency, $not_found_per, $file_name, $code_len);
*/
// build the function search regex
$extra_functions = UPLOAD_FN;
// check for ord() and chr() call only if file has malware markers...
if ($not_found_per > 40 || $frequency->frequency > 20) {
$extra_functions .= "|ord";
}
if ($code_len < 5192 || $not_found_per > $config->fn_random_name_per || $max_len > $config->fn_line_limit || $frequency->frequency > $config->fn_freq_limit) {
debug("adding dynamic function names to regex found:%d, len:%d, freq:%d file[%s]", $not_found_per, $max_len, $frequency->frequency, $file_name);
$extra_functions .= VAR_FN . USER_FN;
}
if ($config->wp_func) {
$extra_functions .= WP_FN;
}
$regex = sprintf(FN1_RX, $extra_functions);
if (preg_match_all($regex, $c3, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
for ($i = 0; $i < count($matches) && count($list) < 2; $i++) {
$fn_name = $matches[$i][0][0];
// let's inspect 12 characters before the function name, to see if it is a function definition
$fun_len = max(0, $matches[$i][0][1] - 12);
$inspect_str = substr($c3, $fun_len, 12);
if (contains($inspect_str, "function")) {
continue;
}
// check all found matches for header with location redirect
if (stripos($fn_name, "header") !== false) {
if (stripos($fn_name, "location") === false &&
stripos($fn_name, "$\w") === false) {
debug("remove header missing location [%s]", $file_name);
continue;
}
}
// if the function call contains call with raw user input, flag it
if (stripos($fn_name, '$_') !== false) {
$m = $malware_factory(
substr($matches[$i][0][0], 0, 1024),
offset_pre_text($c3, $matches[$i][0][1], 512, $size),
offset_post_text($c3, $matches[$i][0][1], 512, $size),
"Dynamic function call with raw user input: $fn_name");
$list->add($m);
continue;
}
// if the function call is dynamic, make sure it passes the minimum frequency requirements
if (strstr($fn_name, "call_user") !== false || strpos(substr($fn_name, 0, 4), "$") !== false) {
if ($config->fn_freq_limit <= $frequency->frequency || $config->fn_random_name_per <= $not_found_per || $config->fn_line_limit <= $max_len) {
$m = $malware_factory(
substr($lines[$line_no], 0, 1024),
offset_pre_text($c3, $matches[$i][0][1], 512, $size),
offset_post_text($c3, $matches[$i][0][1], 512, $size),
"Dangerous dynamic function call: $fn_name");// . $frequency->frequency . ", " . $not_found_per . ", " . $max_len . " | " .$config->fn_line_limit);
$list->add($m);
} else {
debug("skipping dynamic function call [%s]", $file_name);
}
continue;
}
//debug("found malware: %s, [%s] size:(%d)", $file_name, $matches[$i][0][0], $code_len);
// $samples[] = [$matches[$i][0][0], $matches[$i][0][1], "dangerous function call"];
$m = $malware_factory(
substr($lines[$line_no], 0, 3196),
offset_pre_text($c3, $matches[$i][0][1], 512, $size),
offset_post_text($c3, $matches[$i][0][1], 512, $size),
"Dangerous function call: $fn_name");
$list->add($m);
}
// compact the array
debug("%s has %d malware after step 2", $file_name, count($list));
}
// no malware found, find malware in non php include files
if (count($list) < 1 && $config->non_php) {
//debug("search for include malware in %s", $c3);
if (preg_match_all("/^[^|;][^\w\'\"\$]*(?:include|require)(?:_once)?\s*([^\);]+)\s*\)\s*?;/mis", $c3, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
// debug("INCLUDE MALWARE FOUND [%s]", $matches[0][1][0]);
foreach ($matches as $inc) {
$check = $inc[1][0];
/*
$cnt = count_chars($check);
$low = array_filter($cnt, function ($x, $y) {
return $y < 65;
}, ARRAY_FILTER_USE_BOTH);
$high = array_filter($cnt, function ($x, $y) {
return $y >= 65;
}, ARRAY_FILTER_USE_BOTH);
$sum_low = array_sum($low);
$sum_high = array_sum($high);
if ($sum_low > 5 && $sum_low > $sum_high) {
print_r($matches);
printf ("sum low/high [%d/%d]\n", $sum_low, $sum_high);
debug("sum low/high [%d/%d]", $sum_low, $sum_high);
$m = $malware_factory(
$check,
offset_pre_text($c3, $inc[0][1], 512, $size),
offset_post_text($c3, $inc[0][1], 512, $size),
"Including malware PHP file: $check");
$list->add($m);
}
*/
if (icontains($check, ["\x", "chr(", "ord(", "base64("])) {
$m = $malware_factory(
$check,
offset_pre_text($c3, $inc[0][1], 512, $size),
offset_post_text($c3, $inc[0][1], 512, $size),
"Including malware PHP file: $check");
$list->add($m);
}
else {
debug("check [%s]", $check);
if (preg_match("/\.(jpg|jpeg|gif|ico|txt|png|webp)\s*['\"]/mis", $check)) {
$m = $malware_factory(
$check,
offset_pre_text($c3, $inc[0][1], 512, $size),
offset_post_text($c3, $inc[0][1], 512, $size),
"Dangerous include file: $check");
$list->add($m);
debug("found included image [%d]", count($list));
}
}
}
}
}
// use the custom search expression
if (strlen($config->extra_regex) > 1) {
if ($config->extra_regex[0] == "/") {
if (preg_match($config->extra_regex, $c3, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
foreach ($matches as $inc) {
$m = $malware_factory(
$inc[0][0],
offset_pre_text($c3, $inc[0][1], 512, $size),
offset_post_text($c3, $inc[0][1], 512, $size),
"custom regex");
$list->add($m);
}
}
} else {
if ($pos = stripos($c3, $config->extra_regex)) {
$m = $malware_factory(
offset_post_text($c3, $pos, 96, $size),
offset_pre_text($c3, $pos, 512, $size),
offset_post_text($c3, $pos, 512, $size),
"custom regex");
$list->add($m);
}
}
}
/*
// if frequency is high, or the malware is at the beginning/end of the file, return it
if (count($samples) > 0) {
foreach ($samples as $sample) {
//$m = $malware_factory($sample[0], $matches[1], $matches[2], "double php tag");
//$list->add($m);
$offset = $sample[1];
$malware = new Malware();
$malware->content_offset = $offset;
$malware->file_size = $size;
$malware->php_count = $php_count;
$malware->content = $sample[0];
$malware->location = 1; //$location;
$malware->file_size = $size;
$malware->content_indent = get_line_indents($malware->content);
$malware->note = $sample[2];
$match_len = strlen($malware->content);
$pre1 = max(0, $offset - 256);
$pre2 = min(256, $offset);
$malware->pre_text = substr($c3, $pre1, $pre2);
$malware->pre_indent = get_line_indents($malware->pre_text);
$offset += $match_len;
$post2 = min(256, $size - $offset);
$malware->post_text = substr($c3, $offset, $post2);
$tmp = substr($c3, $offset + 2048, min(256, $size - $offset - 2048));
$malware->post_indent = get_line_indents($tmp);
if ($malware->content_indent < $malware->pre_indent && $malware->content_indent < $malware->post_indent) {
$frequency->frequency += 3.0;
}
$malware->frequency = round($frequency->frequency, 2);
$list->add($malware);
//$x = json_encode($list, JSON_PRETTY_PRINT);
if (count($list) >= 10) {
return $list;
}
}
}
debug("malware size [%d] [%s]", count($list), $file_name);
// todo: update to UTF8 chars
// TODO: add FN2_RX
$dynamic_fn = "/.{0,192}(\\$[_A-Za-z]\p{L}*\s*\([^;]+;).{0,192}/sim";
if (preg_match_all($dynamic_fn, $content, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER, max(0, $offset-128))) {
if ($frequency == -1) {
$path = WAF_ROOT."cache/char_frequency.json";
$freq = un_json(FileData::new($path)->raw());
// is the malware near beginning or end? - ALWAYS REPORT
// check indentation level, functions per line and comments.
$frequency = char_freq_analysis($content, $freq);
}
$malware[] = $matches;
}
// refactor to simpler format
$final = [];
foreach($malware as $fn) {
foreach ($fn as $instance) {
$final[] = [$instance[0][1], $instance[0][0], $instance[1][1], $instance[1][0]];
}
}
*/
if ($config->freq_limit > 0) {
// first, lets find all of the files that have some unusual character frequencies
if ($frequency->frequency > $config->freq_limit) {
$m = $malware_factory(substr($frequency->content, 0, 2048), "", "", "unusual character frequency");
$m->frequency = $frequency->frequency;
debug("FREQUENCY MALWARE");
$list->add($m);
}
}
// check the percentage of unknown variable names
if ($config->random_name_per > 0) {
// if we have a lot of unknown tokens, we can assume it's malware
$num_tokens = count($token_names);
if ($not_found_per > $config->random_name_per && $num_tokens > 3) {
// remove known names from list
foreach ($token_names as $name) { if (isset($allow_tokens[$name])) { unset($token_names[$name]); } }
$list->add($malware_factory(substr($content, 0, 4096), "", "", "{$not_found_per}% Unknown variable names: " . substr(join(", ", array_keys($token_names)), 0, 2048)));
}
}
return $list;
}
function offset_pre_text(string $content, int $offset, int $len, int $strlen) {
$len = min(512, $offset);
return substr($content, max($offset - $len, 0), min($len, $offset));
}
function offset_post_text(string $content, int $offset, int $len, int $strlen) {
//$mlen = min(512, $len - $offset);
return substr($content, $offset, $len);
}
// convert bytes to human readable format
function bytes_to_kb($bytes): string
{
if ($bytes > 0 && $bytes < 130) {
$bytes = 130;
} // make sure we always hit at least 0.1Kb
return round((int)$bytes / 1024, 1) . "Kb";
}
/**
* take any function $fn and return a function that will accumulate the result
* $fn first parameter should be the accumulator (or NULL on first call)
* passing RETURN_LOG to the returned function will return the accumulated result
* passing CLEAN_LOG to the returned function will reset the accumulator
* @param callable $fn
* @return callable the accumulator function
*/
function accrue_reduce(callable $fn): callable
{
return function (...$args) use ($fn) {
static $result = NULL;
if (isset($args[0])) {
if ($args[0] === ACTION_RETURN) {
return $result;
} else if ($args[0] === ACTION_CLEAN) {
$result = NULL;
}
}
$result = $fn($result, ...$args);
return NULL;
};
}
/**
* render diff opcodes into a string
* @param null|string $carry
* @param string $opcode
* @param string $from
* @param int $from_offset
* @param int $from_len
* @return string
*/
function opcode_add_only_php(?string $carry, string $opcode, string $from, int $from_offset, int $from_len): string
{
assert(strlen($from) >= ($from_offset + $from_len), "from_offset + from_len is greater than the length of the string");
assert(in_array($opcode, ['i', 'd', 'c', 'r', 'z']), "invalid opcode");
// debug("opcode: [%s] from_offset: %d, from_len: %d, carry: %s", $opcode, $from_offset, $from_len, $carry);
if ($opcode === 'i') {
// only insert the code if it contians stuff that looks like php code
if (preg_match("/\w+\s*\(/", $from)) {
return $carry . substr($from, $from_offset, $from_len);
}
}
return (empty($carry)) ? "" : $carry;
}
/**
* this function renders an opcode into a string
* opcode z resets the string, r will return the created string
* @param mixed $opcode the opcode to render (must be one of i, d, c, r, z)
* @param mixed $from the original string
* @param mixed $from_offset starting offset
* @param mixed $from_len string length from offset
* @return string|void
*/
function opcode_add_only($opcode, $from, $from_offset, $from_len)
{
static $text = "";
assert(strlen($from) >= ($from_offset + $from_len), "from_offset + from_len is greater than the length of the string");
assert(in_array($opcode, ['i', 'd', 'c', 'r', 'z']), "invalid opcode");
if ($opcode === 'i') {
// make sure we always grab enough characters BEFORE the diff to capture a <?php tag
$from_offset = max(0, $from_offset - 6);
$text .= substr($from, $from_offset, $from_len);
} else if ($opcode === 'z') {
$text = "";
} else if ($opcode === 'r') {
return $text;
}
}
// add additional info about the hashes
function enrich_hashes($mh, array $hash): array
{
debug("enrich1 [%s]", json_encode($hash));
// TODO: trim down the data in $hash
// GUARDS
/*
if (!isset($hash['path'])) {
$hash['path'] = $hash['file_path'];
}
*/
// abstracted source cms mapping
$hash['ver'] = version_from_path($hash['file_path']);
$path_to_source_fn = find_fn("path_to_source");
$path = $path_to_source_fn($hash["rel_path"], $hash["type"], $hash["ver"], $hash["name"] ?? null);
$hash['url'] = $path;
$hash['machine_date'] = machine_date($hash['ctime']);
$hash['kb1'] = bytes_to_kb($hash['size']);
$hash['kb2'] = bytes_to_kb($hash['size2'] ?? 0);
$hash['known'] = ($hash['size2'] ?? 0 > 0) ? "WordPress file " : "Unknown file";
$hash['real'] = ($hash['size2'] ?? 0 > 0) ? true : false;
$hash['bgclass'] = ($hash['size2'] ?? 0 > 0) ? "bg-success-soft" : "bg-danger-soft";
$hash['icon'] = ($hash['size2'] ?? 0 > 0) ? "check" : "x";
$hash['icon_class'] = ($hash['size2'] ?? 0 > 0) ? "success" : "danger";
if (!isset($hash['r']) || $hash['r'] !== "PASS") {
if (!empty($mh)) {
$ch = http3("GET", $hash['url']);
$hash['ch'] = $ch;
curl_multi_add_handle($mh, $ch);
} else {
$response = http2("GET", $hash['url']);
$hash['ch'] = $response->content;
}
}
return $hash;
}
function enrich_hashes2(array $hash, $mh, ?ScanConfig $config = null) : array {
//debug("enrich_hashes2: %s (%s)", json_encode($hash), gettype($mh));
$known = true;
if (isset($hash['ch'])) {
if (!empty($mh)) {
$content = curl_multi_getcontent($hash['ch']);
$l = strlen($content);
curl_multi_remove_handle($mh, $hash['ch']);
debug("multi http3 content len: %d", $l);
} else {
$content = $hash['ch'];
$l = strlen($content);
debug("raw http3 content len: %d", $l);
}
if ($l < 300) {
if (stristr($content, "404 Not Found") !== false) {
$known = false;
$content = "";
}
}
unset($hash['ch']);
$local = file_get_contents($hash['file_path']);
$fn = accrue_reduce('\BitFire\opcode_add_only_php');
//opcode_add_only('z', "", 0, 0);
$op_codes = FineDiff::getDiffOpcodes($content, $local, FineDiff::$paragraphGranularity);
//debug("opcode [%s]", $op_codes);
FineDiff::renderFromOpcodes($content, $op_codes, $fn);
$text = $fn(ACTION_RETURN);
$hash['diff'] = $text;
if (strlen($text) > 10) {
$filename = basename($hash['file_path']);
debug("diff len: %d [%s] = [%s]\n%s\n\n", strlen($text), $filename, $hash['url'], substr($text, 0, 4096));
if ($config == null) {
$config = parse_scan_config(CFG::arr("malware_config"));
}
$hash['malware'] = cms_find_malware_str($text, $known, $hash['file_path'], $config);
/*
if (count($hash['malware']) > 0) {
$malware = $hash['malware'][0];
$malware->url = $hash['url'];
}
debug("malware: %s", json_encode($hash['malware']));
*/
}
}
if (!isset($hash['malware'])) {
$hash['malware'] = new Malware_List();
}
return $hash;
}
/**
* load the profile data from in memory cache, or else from the filesystem
* @param string $path
* @return array
*/
function load_cms_profile(string $path): array
{
$profile_path = \BitFire\WAF_ROOT . "cache/profile/{$path}.txt";
$key = crc32($path);
$profile = CacheStorage::get_instance()->load_data("profile:$key", null);
if (empty($profile)) {
if (file_exists($profile_path)) {
// read the profile, unserizlize and return result or empty array
$profile = FileData::new($profile_path)->read()->un_json()->lines;
if (!isset($profile["^a"])) {
$profile = PROFILE_INIT;
$profile['h'] = $_SERVER['HTTP_HOST'] ?? 'na';
}
} else {
$profile = PROFILE_INIT;
}
}
return $profile;
}
function make_sane_path(Request $request): string
{
// todo: add support for multiple extensions, or no extension
$sane_path = str_replace("../", "", $request->path);
$ACTION_PARAMS = find_const_arr("ACTION_PARAMS", ["do", "page", "action", "screen-id"]);
foreach ($ACTION_PARAMS as $param) {
if (isset($request->get[$param])) {
$sane_path .= "^{$param}^{$request->get[$param]}";
break;
}
}
// sanitize and filter
$sane_path = str_replace("/", "~", $sane_path);
$sane_path = preg_replace("/[^a-zA-Z0-9\._-]/m", "#", trim($sane_path, '/'));
return $sane_path;
}
// make sure we only call this for verified browsers...
// sets profile url name to effect->out
function cms_build_profile(\BitFire\Request $request, bool $is_admin): Effect
{
// disable profiling except for custom sites
$effect = Effect::new();
return $effect;
// only build profiles for php paths
if (!ends_with($request->path, ".php")) {
return $effect;
}
// only build a profile if was have a config dir
if (!defined(WAF_INI)) {
return $effect;
}
if (defined(WAF_INI)) {
$path_dir = dirname(WAF_INI, 1) . "/profile";
if (!file_exists($path_dir)) {
mkdir($path_dir, 0775, true);
}
$sane_path = make_sane_path($request);
if (!empty($path_dir)) {
$profile_path = "$path_dir/{$sane_path}.txt";
}
// something went wrong!
else {
return $effect;
}
}
// TODO: update frequency map
// only profile php pages
$profile = load_cms_profile($sane_path);
$m = array_merge($request->get, $request->post);
$filter_params = CFG::arr("filtered_logging");
// update all parameters
foreach ($m as $param => $value) {
if (in_array($param, $filter_params)) {
continue;
}
if (isset($profile[$param])) {
$profile[$param]["a"] += ($is_admin) ? 1 : 0;
$profile[$param]["u"] += ($is_admin) ? 0 : 1;
if (count($profile[$param]["v"]) < PROFILE_MAX_VARS) {
if (!in_array($value, $profile[$param]["v"])) {
$profile[$param]["v"][] = $value;
}
}
} else if (count($profile) < PROFILE_MAX_PARAM) {
$profile[$param] = ["v" => [$value], "u" => (!$is_admin) ? 1 : 0, "a" => ($is_admin) ? 1 : 0];
}
}
// update page counters
$profile["^a"] += ($is_admin) ? 1 : 0;
$profile["^u"] += ($is_admin) ? 0 : 1;
$profile["^g"] += $request->method == "GET" ? 1 : 0;
$profile["^p"] += $request->method == "POST" ? 1 : 0;
if (function_exists("\BitFirePlugin\check_user_cap")) {
if (!isset($profile["^c"])) {
$profile["^c"] = [];
}
$used_caps = check_user_cap(null, null, null, null);
if (count($used_caps) > 0 && count($profile["^c"]) < PROFILE_MAX_CAPS) {
$caps = join(",", $used_caps);
if (isset($profile["^c"][$caps])) {
$profile["^c"][$caps]++;
} else {
$profile["^c"][$caps] = 1;
}
}
}
// update cache - SYNC WITH load_cms_profile key
$effect->update(new CacheItem("profile:" . crc32($sane_path), id_fn($profile), id_fn($profile), DAY));
$effect->out($sane_path)->hide_output(true); // report $sane_path to caller. do not output if effect is run
// persist 1 in 5
if (mt_rand(0, 5) == 1) {
// strip any possible php tags and make file unreadable...
$content = str_replace("<?", "PHP_OPEN", json_encode($profile));
$effect->file(new FileMod($profile_path, $content, FILE_W, 0));
}
// backup 1 in 20
if (mt_rand(0, 20) == 1 || !file_exists($profile_path)) {
// backup the profile after we serve the page
register_shutdown_function(function () use ($sane_path, $profile) {
http2("POST", APP . "profile.php", base64_encode(json_encode(["path" => $sane_path, "profile" => $profile])));
});
}
return $effect;
}
/**
* default file type for cms files.
* @OVERRIDE BitFirePlugin\file_type
* @param string $path path to find type for
* @return string file type
*/
function file_type(string $path): string
{
return "custom";
}
/**
* BitFire hosted file hashes for custom code bases
* @param string $name
* @param string $path
* @param string $ver
* @return string
*/
function path_to_source(string $name, string $path, string $ver): string
{
$client = CFG::str("client_id", "default");
$source = "archive.bitfire.co/source/{$client}/{$name}/{$ver}/{$path}?auth=" . CFG::str("pro_key");
return "https://" . str_replace("//", "/", $source);
}
/**
* return the version number for a package.json or readme.txt file
* @param mixed $path
* @return string
*/
function package_to_ver(string $carry, string $line): string
{
if (!empty($carry)) {
return $carry;
}
if (preg_match("/version[\'\":\s]+([\d\.]+)/i", $line, $matches)) {
return $matches[1];
}
return $carry;
}