includes/libs/rdbms/TransactionProfiler.php
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace Wikimedia\Rdbms;
use Psr\Log\LoggerAwareInterface;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use RuntimeException;
use Wikimedia\ScopedCallback;
use Wikimedia\Stats\StatsFactory;
/**
* Detect high-contention DB queries via profiling calls.
*
* This class is meant to work with an IDatabase object, which manages queries.
*
* @internal For use by Database only
* @since 1.24
* @ingroup Profiler
* @ingroup Database
*/
class TransactionProfiler implements LoggerAwareInterface {
/** @var LoggerInterface */
private $logger;
/** @var StatsFactory */
private $statsFactory;
/** @var array<string,array> Map of (event name => map of FLD_* class constants) */
private $expect;
/** @var array<string,int> Map of (event name => current hits) */
private $hits;
/** @var array<string,int> Map of (event name => violation counter) */
private $violations;
/** @var array<string,int> Map of (event name => silence counter) */
private $silenced;
/**
* @var array<string,array> Map of (trx ID => (write start time, list of DBs involved))
* @phan-var array<string,array{start:float,conns:array<string,int>}>
*/
private $dbTrxHoldingLocks;
/**
* @var array[][] Map of (trx ID => list of (query name, start time, end time))
* @phan-var array<string,array<int,array{0:string,1:float,2:float}>>
*/
private $dbTrxMethodTimes;
/** @var string|null HTTP request method; null for CLI mode */
private $method;
/** @var float|null */
private $wallClockOverride;
/** Treat locks as long-running if they last longer than this many seconds */
private const DB_LOCK_THRESHOLD_SEC = 3.0;
/** Include events in any violation logs if they last longer than this many seconds */
private const EVENT_THRESHOLD_SEC = 0.25;
/** List of event names */
private const EVENT_NAMES = [
'writes',
'queries',
'conns',
'masterConns',
'maxAffected',
'readQueryRows',
'readQueryTime',
'writeQueryTime'
];
/** List of event names with hit counters */
private const COUNTER_EVENT_NAMES = [
'writes',
'queries',
'conns',
'masterConns'
];
/** Key to max expected value */
private const FLD_LIMIT = 0;
/** Key to the function that set the max expected value */
private const FLD_FNAME = 1;
/** Any type of expectation */
public const EXPECTATION_ANY = 'any';
/** Any expectations about replica usage never occurring */
public const EXPECTATION_REPLICAS_ONLY = 'replicas-only';
public function __construct() {
$this->initPlaceholderExpectations();
$this->dbTrxHoldingLocks = [];
$this->dbTrxMethodTimes = [];
$this->silenced = array_fill_keys( self::EVENT_NAMES, 0 );
$this->setLogger( new NullLogger() );
$this->statsFactory = StatsFactory::newNull();
}
public function setLogger( LoggerInterface $logger ) {
$this->logger = $logger;
}
/**
* Set statsFactory
*
* @param StatsFactory $statsFactory
* @return void
*/
public function setStatsFactory( StatsFactory $statsFactory ) {
$this->statsFactory = $statsFactory;
}
/**
* @param ?string $method HTTP method; null for CLI mode
* @return void
*/
public function setRequestMethod( ?string $method ) {
$this->method = $method;
}
/**
* Temporarily ignore expectations until the returned object goes out of scope
*
* During this time, violation of expectations will not be logged and counters
* for expectations (e.g. "conns") will not be incremented.
*
* This will suppress warnings about event counters which have a limit of zero.
* The main use case is too avoid warnings about primary connections/writes and
* warnings about getting any primary/replica connections at all.
*
* @param string $type Class EXPECTATION_* constant [default: TransactionProfiler::EXPECTATION_ANY]
* @return ScopedCallback
*/
public function silenceForScope( string $type = self::EXPECTATION_ANY ) {
if ( $type === self::EXPECTATION_REPLICAS_ONLY ) {
$events = [];
foreach ( [ 'writes', 'masterConns' ] as $event ) {
if ( $this->expect[$event][self::FLD_LIMIT] === 0 ) {
$events[] = $event;
}
}
} else {
$events = self::EVENT_NAMES;
}
foreach ( $events as $event ) {
++$this->silenced[$event];
}
return new ScopedCallback( function () use ( $events ) {
foreach ( $events as $event ) {
--$this->silenced[$event];
}
} );
}
/**
* Set performance expectations
*
* With conflicting expectations, the most narrow ones will be used
*
* @param string $event Event name, {@see self::EVENT_NAMES}
* @param float|int $limit Maximum event count, event value, or total event value
* @param string $fname Caller
* @since 1.25
*/
public function setExpectation( string $event, $limit, string $fname ) {
if ( !isset( $this->expect[$event] ) ) {
return; // obsolete/bogus expectation
}
if ( $limit <= $this->expect[$event][self::FLD_LIMIT] ) {
// New limit is more restrictive
$this->expect[$event] = [
self::FLD_LIMIT => $limit,
self::FLD_FNAME => $fname
];
}
}
/**
* Set one or multiple performance expectations
*
* With conflicting expectations, the most narrow ones will be used
*
* Use this to initialize expectations or make them stricter mid-request
*
* @param array $expects Map of (event name => limit), {@see self::EVENT_NAMES}
* @param string $fname
* @since 1.26
*/
public function setExpectations( array $expects, string $fname ) {
foreach ( $expects as $event => $value ) {
$this->setExpectation( $event, $value, $fname );
}
}
/**
* Reset all performance expectations and hit counters
*
* Use this for unit testing or before applying a totally different set of expectations
* for a different part of the request, such as during "post-send" (execution after HTTP
* response completion)
*
* @since 1.25
*/
public function resetExpectations() {
$this->initPlaceholderExpectations();
}
/**
* Clear all expectations and hit counters and set new performance expectations
*
* Use this to apply a totally different set of expectations for a different part
* of the request, such as during "post-send" (execution after HTTP response completion)
*
* @param array $expects Map of (event name => limit), {@see self::EVENT_NAMES}
* @param string $fname
* @since 1.33
*/
public function redefineExpectations( array $expects, string $fname ) {
$this->initPlaceholderExpectations();
$this->setExpectations( $expects, $fname );
}
/**
* Mark a DB as having been connected to with a new handle
*
* Note that there can be multiple connections to a single DB.
*
* @param string $server DB server
* @param string|null $db DB name
* @param bool $isPrimaryWithReplicas If the server is the primary and there are replicas
*/
public function recordConnection( $server, $db, bool $isPrimaryWithReplicas ) {
// Report when too many connections happen...
if ( $this->pingAndCheckThreshold( 'conns' ) ) {
$this->reportExpectationViolated(
'conns',
"[connect to $server ($db)]",
$this->hits['conns']
);
}
// Report when too many primary connections happen...
if ( $isPrimaryWithReplicas && $this->pingAndCheckThreshold( 'masterConns' ) ) {
$this->reportExpectationViolated(
'masterConns',
"[connect to $server ($db)]",
$this->hits['masterConns']
);
}
}
/**
* Mark a DB as in a transaction with one or more writes pending
*
* Note that there can be multiple connections to a single DB.
*
* @param string $server DB server
* @param string|null $db DB name
* @param string $id ID string of transaction
* @param float $startTime UNIX timestamp
*/
public function transactionWritingIn( $server, $db, string $id, float $startTime ) {
$name = "{$db} {$server} TRX#$id";
if ( isset( $this->dbTrxHoldingLocks[$name] ) ) {
$this->logger->warning( "Nested transaction for '$name' - out of sync." );
}
$this->dbTrxHoldingLocks[$name] = [
'start' => $startTime,
'conns' => [], // all connections involved
];
$this->dbTrxMethodTimes[$name] = [];
foreach ( $this->dbTrxHoldingLocks as $name => &$info ) {
// Track all DBs in transactions for this transaction
$info['conns'][$name] = 1;
}
}
/**
* Register the name and time of a method for slow DB trx detection
*
* This assumes that all queries are synchronous (non-overlapping)
*
* @param string|GeneralizedSql|Query $query Function name or generalized SQL
* @param float $sTime Starting UNIX wall time
* @param bool $isWrite Whether this is a write query
* @param int|null $rowCount Number of affected/read rows
* @param string $trxId Transaction id
* @param string|null $serverName db host name like db1234
*/
public function recordQueryCompletion(
$query,
float $sTime,
bool $isWrite,
?int $rowCount,
string $trxId,
?string $serverName = null
) {
$eTime = $this->getCurrentTime();
$elapsed = ( $eTime - $sTime );
if ( $isWrite && $this->isAboveThreshold( $rowCount, 'maxAffected' ) ) {
$this->reportExpectationViolated( 'maxAffected', $query, $rowCount, $trxId, $serverName );
} elseif ( !$isWrite && $this->isAboveThreshold( $rowCount, 'readQueryRows' ) ) {
$this->reportExpectationViolated( 'readQueryRows', $query, $rowCount, $trxId, $serverName );
}
// Report when too many writes/queries happen...
if ( $this->pingAndCheckThreshold( 'queries' ) ) {
$this->reportExpectationViolated( 'queries', $query, $this->hits['queries'], $trxId, $serverName );
}
if ( $isWrite && $this->pingAndCheckThreshold( 'writes' ) ) {
$this->reportExpectationViolated( 'writes', $query, $this->hits['writes'], $trxId, $serverName );
}
// Report slow queries...
if ( !$isWrite && $this->isAboveThreshold( $elapsed, 'readQueryTime' ) ) {
$this->reportExpectationViolated( 'readQueryTime', $query, $elapsed, $trxId, $serverName );
}
if ( $isWrite && $this->isAboveThreshold( $elapsed, 'writeQueryTime' ) ) {
$this->reportExpectationViolated( 'writeQueryTime', $query, $elapsed, $trxId, $serverName );
}
if ( !$this->dbTrxHoldingLocks ) {
// Short-circuit
return;
} elseif ( !$isWrite && $elapsed < self::EVENT_THRESHOLD_SEC ) {
// Not an important query nor slow enough
return;
}
foreach ( $this->dbTrxHoldingLocks as $name => $info ) {
$lastQuery = end( $this->dbTrxMethodTimes[$name] );
if ( $lastQuery ) {
// Additional query in the trx...
$lastEnd = $lastQuery[2];
if ( $sTime >= $lastEnd ) {
if ( ( $sTime - $lastEnd ) > self::EVENT_THRESHOLD_SEC ) {
// Add an entry representing the time spent doing non-queries
$this->dbTrxMethodTimes[$name][] = [ '...delay...', $lastEnd, $sTime ];
}
$this->dbTrxMethodTimes[$name][] = [ $query, $sTime, $eTime ];
}
} else {
// First query in the trx...
if ( $sTime >= $info['start'] ) {
$this->dbTrxMethodTimes[$name][] = [ $query, $sTime, $eTime ];
}
}
}
}
/**
* Mark a DB as no longer in a transaction
*
* This will check if locks are possibly held for longer than
* needed and log any affected transactions to a special DB log.
* Note that there can be multiple connections to a single DB.
*
* @param string $server DB server
* @param string|null $db DB name
* @param string $id ID string of transaction
* @param float $writeTime Time spent in write queries
* @param int $affected Number of rows affected by writes
*/
public function transactionWritingOut(
$server,
$db,
string $id,
float $writeTime,
int $affected
) {
// Must match $name in transactionWritingIn()
$name = "{$db} {$server} TRX#$id";
if ( !isset( $this->dbTrxMethodTimes[$name] ) ) {
$this->logger->warning( "Detected no transaction for '$name' - out of sync." );
return;
}
$slow = false;
// Warn if too much time was spend writing...
if ( $this->isAboveThreshold( $writeTime, 'writeQueryTime' ) ) {
$this->reportExpectationViolated(
'writeQueryTime',
"[transaction writes to {$db} at {$server}]",
$writeTime,
$id
);
$slow = true;
}
// Warn if too many rows were changed...
if ( $this->isAboveThreshold( $affected, 'maxAffected' ) ) {
$this->reportExpectationViolated(
'maxAffected',
"[transaction writes to {$db} at {$server}]",
$affected,
$id
);
}
// Fill in the last non-query period...
$lastQuery = end( $this->dbTrxMethodTimes[$name] );
if ( $lastQuery ) {
$now = $this->getCurrentTime();
$lastEnd = $lastQuery[2];
if ( ( $now - $lastEnd ) > self::EVENT_THRESHOLD_SEC ) {
$this->dbTrxMethodTimes[$name][] = [ '...delay...', $lastEnd, $now ];
}
}
// Check for any slow queries or non-query periods...
foreach ( $this->dbTrxMethodTimes[$name] as $info ) {
$elapsed = ( $info[2] - $info[1] );
if ( $elapsed >= self::DB_LOCK_THRESHOLD_SEC ) {
$slow = true;
break;
}
}
if ( $slow ) {
$trace = '';
foreach ( $this->dbTrxMethodTimes[$name] as $i => [ $query, $sTime, $end ] ) {
$trace .= sprintf(
"%-2d %.3fs %s\n", $i, ( $end - $sTime ), $this->getGeneralizedSql( $query ) );
}
$this->logger->warning( "Suboptimal transaction [{dbs}]:\n{trace}", [
'dbs' => implode( ', ', array_keys( $this->dbTrxHoldingLocks[$name]['conns'] ) ),
'trace' => mb_substr( $trace, 0, 2000 )
] );
}
unset( $this->dbTrxHoldingLocks[$name] );
unset( $this->dbTrxMethodTimes[$name] );
}
private function initPlaceholderExpectations() {
$this->expect = array_fill_keys(
self::EVENT_NAMES,
[ self::FLD_LIMIT => INF, self::FLD_FNAME => null ]
);
$this->hits = array_fill_keys( self::COUNTER_EVENT_NAMES, 0 );
$this->violations = array_fill_keys( self::EVENT_NAMES, 0 );
}
/**
* @param float|int $value
* @param string $event
* @return bool
*/
private function isAboveThreshold( $value, string $event ) {
if ( $this->silenced[$event] > 0 ) {
return false;
}
return ( $value > $this->expect[$event][self::FLD_LIMIT] );
}
/**
* @param string $event
* @return bool
*/
private function pingAndCheckThreshold( string $event ) {
if ( $this->silenced[$event] > 0 ) {
return false;
}
$newValue = ++$this->hits[$event];
$limit = $this->expect[$event][self::FLD_LIMIT];
return ( $newValue > $limit );
}
/**
* @param string $event
* @param string|GeneralizedSql|Query $query
* @param float|int $actual
* @param string|null $trxId Transaction id
* @param string|null $serverName db host name like db1234
*/
private function reportExpectationViolated(
$event,
$query,
$actual,
?string $trxId = null,
?string $serverName = null
) {
$violations = ++$this->violations[$event];
// First violation; check if this is a web request
if ( $violations === 1 && $this->method !== null ) {
$this->statsFactory->getCounter( 'rdbms_trxprofiler_warnings_total' )
->setLabel( 'event', $event )
->setLabel( 'method', $this->method )
->copyToStatsdAt( "rdbms_trxprofiler_warnings.$event.{$this->method}" )
->increment();
}
$max = $this->expect[$event][self::FLD_LIMIT];
$by = $this->expect[$event][self::FLD_FNAME];
$message = "Expectation ($event <= $max) by $by not met (actual: {actualSeconds})";
if ( $trxId ) {
$message .= ' in trx #{trxId}';
}
$message .= ":\n{query}\n";
$this->logger->warning(
$message,
[
'db_log_category' => 'performance',
'measure' => $event,
'maxSeconds' => $max,
'by' => $by,
'actualSeconds' => $actual,
'query' => $this->getGeneralizedSql( $query ),
'exception' => new RuntimeException(),
'trxId' => $trxId,
// Avoid truncated JSON in Logstash (T349140)
'fullQuery' => mb_substr( $this->getRawSql( $query ), 0, 2000 ),
'dbHost' => $serverName
]
);
}
/**
* @param GeneralizedSql|string|Query $query
* @return string
*/
private function getGeneralizedSql( $query ) {
if ( $query instanceof Query ) {
return $query->getCleanedSql();
}
return $query instanceof GeneralizedSql ? $query->stringify() : $query;
}
/**
* @param GeneralizedSql|string|Query $query
* @return string
*/
private function getRawSql( $query ) {
if ( $query instanceof Query ) {
return $query->getSQL();
}
return $query instanceof GeneralizedSql ? $query->getRawSql() : $query;
}
/**
* @return float UNIX timestamp
* @codeCoverageIgnore
*/
private function getCurrentTime() {
return $this->wallClockOverride ?: microtime( true );
}
/**
* @param float|null &$time Mock UNIX timestamp for testing
* @codeCoverageIgnore
*/
public function setMockTime( &$time ) {
$this->wallClockOverride =& $time;
}
}