includes/SambaSpool.php
<?php
/*
Copyright 2009-2020 Guillaume Boudreau
This file is part of Greyhole.
Greyhole is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Greyhole is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Greyhole. If not, see <http://www.gnu.org/licenses/>.
*/
final class SambaSpool {
public static function create_mem_spool() {
$mounted_already = exec('mount | grep /var/spool/greyhole/mem | wc -l');
if (!$mounted_already && file_exists('/var/spool/greyhole/mem')) {
// In Docker, mount doesn't list mounts... need to check using df instead:
exec("cat /proc/1/sched | grep supervisord", $output, $result);
$is_docker = ($result === 0);
if ($is_docker) {
$output = exec("df /var/spool/greyhole/mem | tail -1");
$mounted_already = preg_match('/^none /', $output);
}
}
if (!$mounted_already) {
if (!file_exists('/var/spool/greyhole/mem')) {
mkdir('/var/spool/greyhole/mem', 0777, TRUE);
chmod('/var/spool/greyhole/mem', 0777); // mkdir mode is affected by the umask, so we need to insure proper mode on that folder.
}
exec('mount -o size=4M -t tmpfs none /var/spool/greyhole/mem 2> /dev/null', $mount_result);
if (!empty($mount_result)) {
Log::error("Error mounting tmpfs in /var/spool/greyhole/mem: $mount_result", Log::EVENT_CODE_SPOOL_MOUNT_FAILED);
}
return TRUE;
}
return FALSE;
}
public static function parse_samba_spool() {
Log::setAction(ACTION_READ_SAMBA_POOL);
$db_spool = DBSpool::getInstance();
// Just in case the spool folder is missing!
if (!file_exists('/var/spool/greyhole/mem')) {
mkdir('/var/spool/greyhole/mem', 0777, TRUE);
chmod('/var/spool/greyhole', 0777); // mkdir mode is affected by the umask, so we need to insure proper mode on that folder.
chmod('/var/spool/greyhole/mem', 0777);
}
if (!DB::acquireLock(ACTION_READ_SAMBA_POOL, 5)) {
// Another thread is already processing the Samba Spool; we don't want multiple threads doing this in parallel!
return;
}
$new_tasks = 0;
$last_line = FALSE;
$act = FALSE;
$close_tasks = array();
while (TRUE) {
$files = array();
$last_filename = FALSE;
exec('find -L /var/spool/greyhole -type f -printf "%T@ %p\n" | sort -n 2> /dev/null | head -n 10000', $files);
if (count($files) == 0) {
break;
}
if ($last_line === FALSE) {
Log::debug("Processing Samba spool...");
}
// Sometimes, the modification timestamps of the spooled files (%T@ above) are the same!
// This sorting function will ensure that writes are after open and before close tasks.
$fct_sort_filename = function ($file1, $file2) {
$file1 = explode(' ', $file1);
$ts1 = array_shift($file1);
$file1 = implode(' ', $file1);
$file2 = explode(' ', $file2);
$ts2 = array_shift($file2);
$file2 = implode(' ', $file2);
list($ts1p1, $ts1p2) = explode('.', $ts1);
list($ts2p1, $ts2p2) = explode('.', $ts2);
$ts1p1 = (int) $ts1p1;
$ts1p2 = (int) $ts1p2;
$ts2p1 = (int) $ts2p1;
$ts2p2 = (int) $ts2p2;
if ($ts1p1 < $ts2p1) {
return -1;
}
if ($ts1p1 > $ts2p1) {
return 1;
}
if ($ts1p2 < $ts2p2) {
return -1;
}
if ($ts1p2 > $ts2p2) {
return 1;
}
$is_file1_write = string_starts_with($file1, '/var/spool/greyhole/mem/');
$is_file2_write = string_starts_with($file2, '/var/spool/greyhole/mem/');
$bfile1 = basename($file1);
$bfile2 = basename($file2);
$ts1 = explode('-', $bfile1)[0];
$ts2 = explode('-', $bfile2)[0];
$seconds1 = substr($ts1, 0, 10);
$seconds2 = substr($ts2, 0, 10);
$useconds1 = substr($ts1, -6);
$useconds2 = substr($ts2, -6);
if ($seconds1 < $seconds2) {
return -1;
}
if ($seconds1 > $seconds2) {
return 1;
}
if ($is_file1_write && $is_file2_write) {
return 0;
}
if (!$is_file1_write && !$is_file2_write) {
if ($useconds1 < $useconds2) {
return -1;
}
return 1;
}
if ($is_file1_write && !$is_file2_write) {
$other_file = $file2;
} else {
$other_file = $file1;
}
$log = file_get_contents($other_file);
if (string_starts_with($log, 'open')) {
return $is_file1_write ? 1 : -1; // open before write
}
if (string_starts_with($log, 'close')) {
return $is_file1_write ? -1 : 1; // close after write
}
return 0;
};
usort($files, $fct_sort_filename);
foreach ($files as $file) {
// Remove timestamp prefix from $file (%T@ above), to get the complete filename
$file = explode(' ', $file);
array_shift($file);
$filename = implode(' ', $file);
if ($last_filename) {
unlink($last_filename);
}
$last_filename = $filename;
$line = file_get_contents($filename);
// Prevent insertion of unneeded duplicates
if ($line === $last_line) {
continue;
}
$line_ar = explode("\n", $line);
$last_line = $line;
// Close & fwrite logs are only processed when no more duplicates are found, so we'll execute this now that a non-duplicate line was found.
if ($act === 'fwrite' || $act === 'close') {
/** @noinspection PhpUndefinedVariableInspection */
$db_spool->close_task($act, $share, $fd, @$fullpath, $close_tasks);
}
$line = $line_ar;
$act = array_shift($line);
$share = array_shift($line);
if ($act == 'mkdir') {
// Just create the same folder on the 2 backup drives, to be able to get back empty folders, if we ever lose the LZ
if (!empty($line[1])) {
// two lines = basename\ndirname
$path = str_replace(get_share_landing_zone($share) . '/', '', $line[1] . "/" . $line[0]);
} else {
$path = $line[0];
}
$dir_fullpath = get_share_landing_zone($share) . "/" . $path;
Log::debug("Directory created: $share/$path");
foreach (Config::get(CONFIG_METASTORE_BACKUPS) as $metastore_backup_drive) {
$backup_drive = str_replace('/' . Metastores::METASTORE_BACKUP_DIR, '', $metastore_backup_drive);
if (StoragePool::is_pool_drive($backup_drive)) {
gh_mkdir("$backup_drive/$share/$path", $dir_fullpath);
}
}
FileHook::trigger(FileHook::EVENT_TYPE_MKDIR, $share, $path);
continue;
}
$result = array_pop($line);
if (string_starts_with($result, 'failed')) {
Log::debug("Failed $act in $share/$line[0]. Skipping.");
continue;
}
unset($fullpath);
unset($fullpath_target);
unset($fd);
switch ($act) {
case 'open':
$fullpath = array_shift($line);
$fd = array_shift($line);
if (!empty($line)) {
array_shift($line); // 'for writing'
}
if (!empty($line[0])) {
// 1st line above is just basename - not really useful; this line here is the full real path, i.e. what we need (minus the LZ prefix)
$fullpath = str_replace(get_share_landing_zone($share) . '/', '', array_shift($line));
}
$act = 'write';
break;
case 'rmdir':
case 'unlink':
$fullpath = array_shift($line);
break;
case 'rename':
case 'link':
$fullpath = array_shift($line);
$fullpath_target = array_shift($line);
break;
case 'fwrite':
case 'close':
$fd = array_shift($line);
if (!empty($line)) {
$fullpath = array_shift($line);
}
if (empty($fullpath)) {
$fullpath = NULL;
}
break;
default:
$act = FALSE;
}
if ($act === FALSE) {
continue;
}
// Close & fwrite logs are only processed when no more duplicates are found, so we won't execute it just yet; we'll process it the next time we find a non-duplicate line.
if ($act != 'close' && $act != 'fwrite') {
if (isset($fd) && $fd == -1) {
continue;
}
if ($act != 'unlink' && $act != 'rmdir' && array_contains(ConfigHelper::$trash_share_names, $share)) { continue; }
$new_tasks++;
/** @noinspection PhpUndefinedVariableInspection */
$db_spool->insert($act, $share, @$fullpath, @$fullpath_target, @$fd);
}
}
if ($last_filename) {
unlink($last_filename);
}
}
// Close & fwrite logs are only processed when no more duplicates are found, so we'll execute this now that we're done parsing all spooled files.
if ($act === 'fwrite' || $act === 'close') {
/** @noinspection PhpUndefinedVariableInspection */
$db_spool->close_task($act, $share, $fd, @$fullpath, $close_tasks);
}
Log::perf("Finished parsing spool.");
// We also need to 'execute' all close tasks, now that all fwrite have been logged
if (!empty($close_tasks)) {
Log::perf("Found " . count($close_tasks) . " close tasks. Will finalize all write tasks for those, if any...");
$db_spool->close_all_tasks($close_tasks);
}
if ($new_tasks > 0) {
Log::debug("Found $new_tasks new tasks in spool.");
}
DB::releaseLock(ACTION_READ_SAMBA_POOL);
Log::restorePreviousAction();
}
}
?>