From e1227913b7884e34249705a169b7ba95bd72f003 Mon Sep 17 00:00:00 2001 From: Christian Aistleitner Date: Thu, 22 Mar 2012 10:10:31 +0100 Subject: [PATCH] Refactoring dumpTextPass's error handling Change-Id: I9c0af860ca97489222b3cec071ed721ce72ad9e5 --- maintenance/dumpTextPass.php | 242 +++++++++++++++++++++++++---------- 1 file changed, 171 insertions(+), 71 deletions(-) diff --git a/maintenance/dumpTextPass.php b/maintenance/dumpTextPass.php index c03f3df893..82dbaf794b 100644 --- a/maintenance/dumpTextPass.php +++ b/maintenance/dumpTextPass.php @@ -41,9 +41,7 @@ class TextPassDumper extends BackupDumper { var $prefetchCountLast = 0; var $fetchCountLast = 0; - var $failures = 0; var $maxFailures = 5; - var $failedTextRetrievals = 0; var $maxConsecutiveFailedTextRetrievals = 200; var $failureTimeout = 5; // Seconds to sleep after db failure @@ -71,6 +69,52 @@ class TextPassDumper extends BackupDumper { */ protected $db; + + /** + * Drop the database connection $this->db and try to get a new one. + * + * This function tries to get a /different/ connection if this is + * possible. Hence, (if this is possible) it switches to a different + * failover upon each call. + * + * This function resets $this->lb and closes all connections on it. + * + * @throws MWException + */ + function rotateDb() { + // Cleaning up old connections + if ( isset( $this->lb ) ) { + $this->lb->closeAll(); + unset( $this->lb ); + } + + if ( isset( $this->db ) && $this->db->isOpen() ) { + throw new MWException( 'DB is set and has not been closed by the Load Balancer' ); + } + + unset( $this->db ); + + // Trying to set up new connection. + // We do /not/ retry upon failure, but delegate to encapsulating logic, to avoid + // individually retrying at different layers of code. + + // 1. The LoadBalancer. + try { + $this->lb = wfGetLBFactory()->newMainLB(); + } catch (Exception $e) { + throw new MWException( __METHOD__ . " rotating DB failed to obtain new load balancer (" . $e->getMessage() . ")" ); + } + + + // 2. The Connection, through the load balancer. + try { + $this->db = $this->lb->getConnection( DB_SLAVE, 'backup' ); + } catch (Exception $e) { + throw new MWException( __METHOD__ . " rotating DB failed to obtain new database (" . $e->getMessage() . ")" ); + } + } + + function initProgress( $history ) { parent::initProgress(); $this->timeOfCheckpoint = $this->startTime; @@ -87,7 +131,19 @@ class TextPassDumper extends BackupDumper { $this->initProgress( $this->history ); - $this->db = $this->backupDb(); + // We are trying to get an initial database connection to avoid that the + // first try of this request's first call to getText fails. However, if + // obtaining a good DB connection fails it's not a serious issue, as + // getText does retry upon failure and can start without having a working + // DB connection. + try { + $this->rotateDb(); + } catch (Exception $e) { + // We do not even count this as failure. Just let eventual + // watchdogs know. + $this->progress( "Getting initial DB connection failed (" . + $e->getMessage() . ")" ); + } $this->egress = new ExportProgressFilter( $this->sink, $this ); @@ -316,98 +372,142 @@ class TextPassDumper extends BackupDumper { return true; } + /** + * Tries to get the revision text for a revision id. + * + * Upon errors, retries (Up to $this->maxFailures tries each call). + * If still no good revision get could be found even after this retrying, "" is returned. + * If no good revision text could be returned for + * $this->maxConsecutiveFailedTextRetrievals consecutive calls to getText, MWException + * is thrown. + * + * @param $id string The revision id to get the text for + * + * @return string The revision text for $id, or "" + * @throws MWException + */ function getText( $id ) { + $prefetchNotTried = true; // Whether or not we already tried to get the text via prefetch. + $text = false; // The candidate for a good text. false if no proper value. + $failures = 0; // The number of times, this invocation of getText already failed. + + static $consecutiveFailedTextRetrievals = 0; // The number of times getText failed without + // yielding a good text in between. + $this->fetchCount++; - if ( isset( $this->prefetch ) ) { - $text = $this->prefetch->prefetch( $this->thisPage, $this->thisRev ); - if ( $text !== null ) { // Entry missing from prefetch dump - $dbr = wfGetDB( DB_SLAVE ); + + // To allow to simply return on success and do not have to worry about book keeping, + // we assume, this fetch works (possible after some retries). Nevertheless, we koop + // the old value, so we can restore it, if problems occur (See after the while loop). + $oldConsecutiveFailedTextRetrievals = $consecutiveFailedTextRetrievals; + $consecutiveFailedTextRetrievals = 0; + + while ( $failures < $this->maxFailures ) { + + // As soon as we found a good text for the $id, we will return immediately. + // Hence, if we make it past the try catch block, we know that we did not + // find a good text. + + try { + // Step 1: Get some text (or reuse from previous iteratuon if checking + // for plausibility failed) + + // Trying to get prefetch, if it has not been tried before + if ( $text === false && isset( $this->prefetch ) && $prefetchNotTried ) { + $prefetchNotTried = false; + $tryIsPrefetch = true; + $text = $this->prefetch->prefetch( $this->thisPage, $this->thisRev ); + if ( $text === null ) { + $text = false; + } + } + + if ( $text === false ) { + // Fallback to asking the database + $tryIsPrefetch = false; + if ( $this->spawn ) { + $text = $this->getTextSpawned( $id ); + } else { + $text = $this->getTextDb( $id ); + } + } + + if ( $text === false ) { + throw new MWException( "Generic error while obtaining text for id " . $id ); + } + + // We received a good candidate for the text of $id via some method + + // Step 2: Checking for plausibility and return the text if it is + // plausible $revID = intval( $this->thisRev ); - $revLength = $dbr->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) ); - // if length of rev text in file doesn't match length in db, we reload - // this avoids carrying forward broken data from previous xml dumps + if ( ! isset( $this->db ) ) { + throw new MWException( "No database available" ); + } + $revLength = $this->db->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) ); if( strlen( $text ) == $revLength ) { - $this->prefetchCount++; + if ( $tryIsPrefetch ) { + $this->prefetchCount++; + } return $text; } + + $text = false; + throw new MWException( "Received text is unplausible for id " . $id ); + + } catch (Exception $e) { + $msg = "getting/checking text " . $id . " failed (".$e->getMessage().")"; + if ( $failures + 1 < $this->maxFailures ) { + $msg .= " (Will retry " . ( $this->maxFailures - $failures - 1) . " more times)"; + } + $this->progress( $msg ); } - } - return $this->doGetText( $id ); - } + + // Something went wrong; we did not a text that was plausible :( + $failures++; + - private function doGetText( $id ) { - $id = intval( $id ); - $this->failures = 0; - $ex = new MWException( "Graceful storage failure" ); - while (true) { - if ( $this->spawn ) { - if ($this->failures) { - // we don't know why it failed, could be the child process - // borked, could be db entry busted, could be db server out to lunch, - // so cover all bases + // After backing off for some time, we try to reboot the whole process as + // much as possible to not carry over failures from one part to the other + // parts + sleep( $this->failureTimeout ); + try { + $this->rotateDb(); + if ( $this->spawn ) { $this->closeSpawn(); $this->openSpawn(); } - $text = $this->getTextSpawned( $id ); - } else { - $text = $this->getTextDbSafe( $id ); - } - if ( $text === false ) { - $this->failures++; - if ( $this->failures > $this->maxFailures) { - $this->progress( "Failed to retrieve revision text for text id ". - "$id after $this->maxFailures tries, giving up" ); - // were there so many bad retrievals in a row we want to bail? - // at some point we have to declare the dump irretrievably broken - $this->failedTextRetrievals++; - if ($this->failedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals) { - throw $ex; - } else { - // would be nice to return something better to the caller someday, - // log what we know about the failure and about the revision - return ""; - } - } else { - $this->progress( "Error $this->failures " . - "of allowed $this->maxFailures retrieving revision text for text id $id! " . - "Pausing $this->failureTimeout seconds before retry..." ); - sleep( $this->failureTimeout ); - } - } else { - $this->failedTextRetrievals= 0; - return $text; + } catch (Exception $e) { + $this->progress( "Rebooting getText infrastructure failed (".$e->getMessage().")" . + " Trying to continue anyways" ); } } - return ''; - } - /** - * Fetch a text revision from the database, retrying in case of failure. - * This may survive some transitory errors by reconnecting, but - * may not survive a long-term server outage. - * - * FIXME: WTF? Why is it using a loop and then returning unconditionally? - * @param $id int - * @return bool|string - */ - private function getTextDbSafe( $id ) { - while ( true ) { - try { - $text = $this->getTextDb( $id ); - } catch ( DBQueryError $ex ) { - $text = false; - } - return $text; + // Retirieving a good text for $id failed (at least) maxFailures times. + // We abort for this $id. + + // Restoring the consecutive failures, and maybe aborting, if the dump + // is too broken. + $consecutiveFailedTextRetrievals = $oldConsecutiveFailedTextRetrievals + 1; + if ( $consecutiveFailedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals ) { + throw new MWException( "Graceful storage failure" ); } + + return ""; } + /** * May throw a database error if, say, the server dies during query. * @param $id * @return bool|string + * @throws MWException */ private function getTextDb( $id ) { global $wgContLang; + if ( ! isset( $this->db ) ) { + throw new MWException( __METHOD__ . "No database available" ); + } $row = $this->db->selectRow( 'text', array( 'old_text', 'old_flags' ), array( 'old_id' => $id ), -- 2.20.1