includes/Tasks/BalanceTask.php
<?php
/*
Copyright 2009-2020 Guillaume Boudreau
This file is part of Greyhole.
Greyhole is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Greyhole is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Greyhole. If not, see <http://www.gnu.org/licenses/>.
*/
class BalanceTask extends AbstractTask {
private $skip_stickies = FALSE;
public function execute() {
Log::info("Starting available space balancing");
// Start with shares that have sticky files, so that subsequent shares will be used to try to balance what moving files into stick_into drives could debalance...
// Then start with the shares for which we keep the most # copies;
// That way, if the new drive fails soon, it won't take with it files for which we only have one copy!
$compare_share_balance = function ($a, $b) {
if (static::is_share_sticky($a['name']) && !static::is_share_sticky($b['name'])) {
return -1;
}
if (!static::is_share_sticky($a['name']) && static::is_share_sticky($b['name'])) {
return 1;
}
if ($a[CONFIG_NUM_COPIES] != $b[CONFIG_NUM_COPIES]) {
return $a[CONFIG_NUM_COPIES] > $b[CONFIG_NUM_COPIES] ? -1 : 1;
}
// Randomize order of the shares that have the same num_copies, in order to not always work with the same shares first
return rand(0, 1) ? -1 : 1;
};
$sorted_shares_options = SharesConfig::getShares();
unset($sorted_shares_options[CONFIG_TRASH_SHARE]);
uasort($sorted_shares_options, $compare_share_balance);
Log::debug("┌ Will balance the shares in the following order: " . implode(", ", array_keys($sorted_shares_options)));
// Look for files larger than 100MB first, and balance using those.
// If that's not enough, continue balancing by using files 10-100MB, then 5-10MB, then 1-5MB. At that point, give up!
foreach ([100, 10, 5, 1] as $min_file_size) {
foreach ($sorted_shares_options as $share => $share_options) {
if ($share_options[CONFIG_NUM_COPIES] == count(Config::storagePoolDrives())) {
// Files are everywhere; won't be able to use that share to balance available space!
Log::debug("├ Skipping share $share; has num_copies = max");
continue;
}
if ($this->skip_stickies && static::is_share_sticky($share)) {
Log::debug("├ Skipping share $share; is sticky");
continue;
}
$this->balance_share($share, $share_options, $min_file_size);
}
}
Log::debug("└ Done balancing all shares");
if ($this->skip_stickies) {
// We skipped some stickies... Let's re-balance to move those, and continue balancing.
$arr = debug_backtrace();
if (count($arr) < 93) {
Log::debug("Some shares with sticky files were skipped. Balancing will now re-start to continue moving those sticky files as needed, and further balance. Recursion level = " . count($arr));
return $this->execute();
}
Log::info("Maximum number of consecutive balance reached. You'll need to re-execute --balance if you want to balance further.");
}
Log::info("Available space balancing completed.");
return TRUE;
}
private function balance_share($share, $share_options, $min_file_size) {
Log::debug("├┐ Balancing share: $share ({$min_file_size}MB or + files)");
/** @var $drives_selectors PoolDriveSelector[] */
$drives_selectors = Config::get(CONFIG_DRIVE_SELECTION_ALGORITHM);
$all_drives = array();
foreach ($drives_selectors as $ds) {
$all_drives[] = $ds->drives;
}
$drives_selectors_share = SharesConfig::get($share, CONFIG_DRIVE_SELECTION_ALGORITHM);
$share_drives = array();
foreach ($drives_selectors_share as $ds) {
$share_drives[] = $ds->drives;
}
if ($share_drives !== $all_drives) {
Log::debug("├┘ Won't balance using share $share, because it uses a custom 'drive_selection_algorithm' config.");
return;
}
foreach ($drives_selectors as $ds) {
//Log::debug("│├ Drives to balance: " . implode(', ', $ds->drives));
// Move files from the drive with the less available space to the drive with the most available space.
$pool_drives_avail_space = StoragePool::get_drives_available_space();
$balance_direction_asc = array();
foreach ($pool_drives_avail_space as $drive => $available_space) {
if (!array_contains($ds->drives, $drive)) {
// Only work on the drives part of the current PoolDriveSelector
unset($pool_drives_avail_space[$drive]);
continue;
}
$target_avail_space = array_sum($pool_drives_avail_space) / count($pool_drives_avail_space);
$balance_direction_asc[$drive] = $pool_drives_avail_space[$drive] < $target_avail_space;
}
if (count($pool_drives_avail_space) > 0) {
// Balance the drives that needs the most first
$target_avail_space = array_sum($pool_drives_avail_space) / count($pool_drives_avail_space);
$sort_fct = function ($drive1, $drive2) use ($pool_drives_avail_space, $target_avail_space) {
$delta_needed1 = $target_avail_space - $pool_drives_avail_space[$drive1];
$delta_needed2 = $target_avail_space - $pool_drives_avail_space[$drive2];
return $delta_needed1 > $delta_needed2 ? -1 : 1;
};
uksort($pool_drives_avail_space, $sort_fct);
Log::debug("│├ Will work on the storage pool drives in the following order: " . implode(", ", array_keys($pool_drives_avail_space)));
}
foreach ($pool_drives_avail_space as $source_drive => $current_avail_space) {
$this->balance_drive($share, $share_options, $source_drive, $pool_drives_avail_space, $balance_direction_asc, $min_file_size);
}
}
Log::debug("├┘ Done balancing share: $share ({$min_file_size}MB or + files)");
}
private function balance_drive($share, $share_options, $source_drive, &$pool_drives_avail_space, $balance_direction_asc, $min_file_size) {
$current_avail_space = $pool_drives_avail_space[$source_drive];
$target_avail_space = array_sum($pool_drives_avail_space) / count($pool_drives_avail_space);
$delta_needed = $target_avail_space - $current_avail_space;
if ($delta_needed <= 10*1024 || $delta_needed < $min_file_size*1024) {
Log::debug("│├ Skipping balancing storage pool drive: $source_drive; it has enough available space: (". bytes_to_human($current_avail_space*1024, FALSE) ." available, target: ". bytes_to_human($target_avail_space*1024, FALSE) .")");
return;
}
Log::debug("│├┐ Balancing storage pool drive: $source_drive (". bytes_to_human($current_avail_space*1024, FALSE) ." available, target: ". bytes_to_human($target_avail_space*1024, FALSE) .")");
// Files candidate to get moved
$files = array();
if (is_dir("$source_drive/$share")) {
$max_file_size = floor($delta_needed / 1024);
$command = "find " . escapeshellarg("$source_drive/$share") . " -type f -size +{$min_file_size}M -size -{$max_file_size}M";
//Log::debug("│││ Looking for files to move on $source_drive/$share, with file size between {$min_file_size}-{$max_file_size} MB ...");
exec($command, $files);
}
if (count($files) == 0) {
Log::debug("│├┘ Found no files that could be moved.");
return;
}
Log::debug("│││ Found ". count($files) ." files that can be moved.");
// Repeat until all drives' available space is balanced
$file_too_large_warnings = 0;
foreach ($files as $file) {
if (!$this->balance_file($file, $share, $share_options, $source_drive, $pool_drives_avail_space, $balance_direction_asc, $file_too_large_warnings)) {
break;
}
}
$delta_needed = $target_avail_space - $current_avail_space;
if ($delta_needed > 50*1024) {
Log::debug("│├┘ Balancing storage pool drive $source_drive finished before enough space was made available: (" . bytes_to_human($current_avail_space * 1024, FALSE) . " available, target: ". bytes_to_human($target_avail_space*1024, FALSE) .")");
}
}
private function balance_file($file, $share, $share_options, $source_drive, &$pool_drives_avail_space, $balance_direction_asc, &$file_too_large_warnings) {
$num_total_drives = count($pool_drives_avail_space);
$current_avail_space = $pool_drives_avail_space[$source_drive];
$target_avail_space = array_sum($pool_drives_avail_space) / count($pool_drives_avail_space);
$delta_needed = $target_avail_space - $current_avail_space;
if ($delta_needed <= 50*1024) {
Log::debug("│├┘ Storage pool drive $source_drive now has enough available space: (". bytes_to_human($current_avail_space*1024, FALSE) ." available, target: ". bytes_to_human($target_avail_space*1024, FALSE) .")");
return FALSE;
}
// Let's not try to move locked files!
if (gh_is_file_locked($file) !== FALSE) {
Log::debug("││├ File $file is locked by another process. Skipping.");
return TRUE;
}
$filesize = gh_filesize($file)/1024; // KB
if ($filesize > $delta_needed) {
// 20x "File is too large" in a row = stop trying to balance $source_drive/$share (return FALSE)
if (++$file_too_large_warnings >= 20) {
Log::debug("││├ $file_too_large_warnings files too large in a row. Stopping balance of $source_drive/$share");
return FALSE;
}
Log::debug("││├ File is too large (" . bytes_to_human($filesize*1024, FALSE) . "). Skipping.");
return TRUE;
}
$file_too_large_warnings = 0;
$full_path = mb_substr($file, mb_strlen("$source_drive/$share/"));
list($path, ) = explode_full_path($full_path);
Log::debug("││├┐ Working on file: $share/$full_path (". bytes_to_human($filesize*1024, FALSE) .")");
// $is_sticky is set in choose_target_drives(), based on $share & $path
$target_drives = StoragePool::choose_target_drives($filesize, FALSE, $share, $path, ' ', $is_sticky);
unset($sp_drive);
if ($is_sticky) {
if (count($target_drives) == $num_total_drives - 1 && !array_contains($target_drives, $source_drive)) {
// Only drive full is the source drive. Let's move files away from there!
} else if (count($target_drives) < $num_total_drives) {
$this->skip_stickies = TRUE;
Log::debug("│├┴┘ Some drives are full. Skipping sticky shares until all drives have some free space.");
return FALSE;
}
$sticky_drives = array_slice($target_drives, 0, SharesConfig::getNumCopies($share));
if (array_contains($sticky_drives, $source_drive)) {
// Source drive is a stick_into drive; let's not move that file!
Log::debug("││├┘ Source is sticky. Skipping.");
return TRUE;
}
$already_stuck_copies = 0;
foreach ($sticky_drives as $drive) {
if (file_exists("$drive/$share/$full_path")) {
$already_stuck_copies++;
} else {
$sp_drive = $drive;
}
}
} else {
while (count($target_drives) > 0) {
$drive = array_shift($target_drives);
if (!file_exists("$drive/$share/$full_path") && array_contains(array_keys($pool_drives_avail_space), $drive)) {
$sp_drive = $drive;
break;
}
}
}
if (!isset($sp_drive)) {
// Can't find a drive that doesn't have this file; skipping.
if ($is_sticky) {
Log::debug("││├┘ Sticky file is already where it should be. Skipping.");
}
return TRUE;
}
Log::debug("││││ Target drive: $sp_drive (". bytes_to_human($pool_drives_avail_space[$sp_drive]*1024, FALSE) ." available)");
if ($is_sticky) {
Log::debug("││││ Moving sticky file, even if that means it won't help balancing available space.");
} else {
$new_drive_needs_more_avail_space = $balance_direction_asc[$sp_drive];
$new_drive_needs_less_avail_space = !$new_drive_needs_more_avail_space;
$new_drive_avail_space = $pool_drives_avail_space[$sp_drive];
if ($new_drive_needs_more_avail_space && $new_drive_avail_space <= $target_avail_space) {
Log::debug("││├┘ Target drive needs more available space; moving a file there would do the opposite. Skipping.");
return TRUE;
}
if ($new_drive_needs_less_avail_space && $new_drive_avail_space <= $target_avail_space) {
Log::debug("││├┘ Target drive needed less available space; is low enough now. Skipping.");
return TRUE;
}
}
// Make sure the parent directory exists, before we try moving something there...
$original_path = clean_dir("$source_drive/$share/$path");
list($target_path, $filename) = explode_full_path("$sp_drive/$share/$full_path");
gh_mkdir($target_path, $original_path);
// Move the file
$temp_path = StorageFile::get_temp_filename("$sp_drive/$share/$full_path");
$file_permissions = StorageFile::get_file_permissions($file);
Log::debug("││││ Moving file copy...");
$it_worked = gh_rename($file, $temp_path);
if ($it_worked) {
gh_rename($temp_path, "$sp_drive/$share/$full_path");
StorageFile::set_file_permissions("$sp_drive/$share/$full_path", $file_permissions);
$pool_drives_avail_space[$sp_drive] -= $filesize;
$pool_drives_avail_space[$source_drive] += $filesize;
} else {
Log::warn("││├┘ Failed file copy. Skipping.", Log::EVENT_CODE_FILE_COPY_FAILED);
@unlink($temp_path);
return TRUE;
}
// Update metafiles
foreach (Metastores::get_metafiles($share, $path, $filename, TRUE, TRUE, FALSE) as $existing_metafiles) {
foreach ($existing_metafiles as $key => $metafile) {
if ($metafile->path == $file) {
$metafile->path = "$sp_drive/$share/$full_path";
unset($existing_metafiles[$key]);
$metafile->state = Metafile::STATE_OK;
if ($metafile->is_linked) {
// Re-create correct symlink
$landing_zone = $share_options[CONFIG_LANDING_ZONE];
Log::debug("││││ Updating symlink at $landing_zone/$full_path to point to $metafile->path");
if (is_link("$landing_zone/$full_path")) {
Trash::trash_file("$landing_zone/$full_path");
}
// Creating this symlink can fail if the parent dir was removed
@gh_symlink($metafile->path, "$landing_zone/$full_path");
}
$existing_metafiles[$metafile->path] = $metafile;
Metastores::save_metafiles($share, $path, $filename, $existing_metafiles);
break;
}
}
}
$current_avail_space = $pool_drives_avail_space[$source_drive];
$target_avail_space = array_sum($pool_drives_avail_space) / count($pool_drives_avail_space);
Log::debug("││├┘ Balancing storage pool drive: $source_drive (". bytes_to_human($current_avail_space*1024, FALSE) ." available, target: ". bytes_to_human($target_avail_space*1024, FALSE) .")");
return TRUE;
}
private static function is_share_sticky($share_name) {
$sticky_files = Config::get(CONFIG_STICKY_FILES);
if (!empty($sticky_files)) {
foreach ($sticky_files as $share_dir => $stick_into) {
if (string_starts_with($share_dir, $share_name)) {
return TRUE;
}
}
}
return FALSE;
}
}
?>