production.
=== Configuration changes in 1.28 ===
+* BREAKING CHANGE: $wgHTTPProxy is now *required* for all external requests
+ made by MediaWiki via a proxy. Relying on the http_proxy environment
+ variable is no longer supported.
* The load.php entry point now enforces the existing policy of not allowing
access to session data, which includes the session user and the session
user's language. If such access is attempted, an exception will be thrown.
'ClassCollector' => __DIR__ . '/includes/utils/AutoloadGenerator.php',
'CleanupAncientTables' => __DIR__ . '/maintenance/cleanupAncientTables.php',
'CleanupBlocks' => __DIR__ . '/maintenance/cleanupBlocks.php',
+ 'CleanupEmptyCategories' => __DIR__ . '/maintenance/cleanupEmptyCategories.php',
'CleanupPreferences' => __DIR__ . '/maintenance/cleanupPreferences.php',
'CleanupRemovedModules' => __DIR__ . '/maintenance/cleanupRemovedModules.php',
'CleanupSpam' => __DIR__ . '/maintenance/cleanupSpam.php',
}
}
+ $options = []; // HTTP header options
+ if ( isset( $_SERVER['HTTP_RANGE'] ) ) {
+ $options['range'] = $_SERVER['HTTP_RANGE'];
+ }
+ if ( isset( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) ) {
+ $options['if-modified-since'] = $_SERVER['HTTP_IF_MODIFIED_SINCE'];
+ }
+
if ( $request->getCheck( 'download' ) ) {
$headers[] = 'Content-Disposition: attachment';
}
// Stream the requested file
wfDebugLog( 'img_auth', "Streaming `" . $filename . "`." );
- $repo->streamFile( $filename, $headers );
+ $repo->streamFile( $filename, $headers, $options );
}
/**
$this->mSubcats = 0;
$this->mFiles = 0;
+ # If the title exists, call refreshCounts to add a row for it.
+ if ( $this->mTitle->exists() ) {
+ DeferredUpdates::addCallableUpdate( [ $this, 'refreshCounts' ] );
+ }
+
return true;
} else {
return false; # Fail
[ 'LOCK IN SHARE MODE' ]
);
+ $shouldExist = $result->pages > 0 || $this->getTitle()->exists();
+
if ( $this->mID ) {
- # The category row already exists, so do a plain UPDATE instead
- # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating a gap
- # in the cat_id sequence. The row may or may not be "affected".
- $dbw->update(
- 'category',
- [
- 'cat_pages' => $result->pages,
- 'cat_subcats' => $result->subcats,
- 'cat_files' => $result->files
- ],
- [ 'cat_title' => $this->mName ],
- __METHOD__
- );
- } else {
+ if ( $shouldExist ) {
+ # The category row already exists, so do a plain UPDATE instead
+ # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating a gap
+ # in the cat_id sequence. The row may or may not be "affected".
+ $dbw->update(
+ 'category',
+ [
+ 'cat_pages' => $result->pages,
+ 'cat_subcats' => $result->subcats,
+ 'cat_files' => $result->files
+ ],
+ [ 'cat_title' => $this->mName ],
+ __METHOD__
+ );
+ } else {
+ # The category is empty and has no description page, delete it
+ $dbw->delete(
+ 'category',
+ [ 'cat_title' => $this->mName ],
+ __METHOD__
+ );
+ $this->mID = false;
+ }
+ } elseif ( $shouldExist ) {
+ # The category row doesn't exist but should, so create it. Use
+ # upsert in case of races.
$dbw->upsert(
'category',
[
],
__METHOD__
);
+ // @todo: Should we update $this->mID here? Or not since Category
+ // objects tend to be short lived enough to not matter?
}
$dbw->endAtomic( __METHOD__ );
}
/**
- * Gets the relevant proxy from $wgHTTPProxy/http_proxy (when set).
+ * Gets the relevant proxy from $wgHTTPProxy
*
* @return mixed The proxy address or an empty string if not set.
*/
return $wgHTTPProxy;
}
- $envHttpProxy = getenv( "http_proxy" );
- if ( $envHttpProxy ) {
- return $envHttpProxy;
- }
-
return "";
}
}
return;
}
- // Otherwise, fallback to $wgHTTPProxy/http_proxy (when set) if this is not a machine
+ // Otherwise, fallback to $wgHTTPProxy if this is not a machine
// local URL and proxies are not disabled
if ( Http::isLocalURL( $this->url ) || $this->noProxy ) {
$this->proxy = '';
* Functions related to the output of file content
*/
class StreamFile {
- const READY_STREAM = 1;
- const NOT_MODIFIED = 2;
+ // Do not send any HTTP headers unless requested by caller (e.g. body only)
+ const STREAM_HEADLESS = 1;
+ // Do not try to tear down any PHP output buffers
+ const STREAM_ALLOW_OB = 2;
/**
* Stream a file to the browser, adding all the headings and fun stuff.
* and Content-Disposition.
*
* @param string $fname Full name and path of the file to stream
- * @param array $headers Any additional headers to send
+ * @param array $headers Any additional headers to send if the file exists
* @param bool $sendErrors Send error messages if errors occur (like 404)
+ * @param array $optHeaders HTTP request header map (e.g. "range") (use lowercase keys)
+ * @param integer $flags Bitfield of STREAM_* constants
* @throws MWException
* @return bool Success
*/
- public static function stream( $fname, $headers = [], $sendErrors = true ) {
+ public static function stream(
+ $fname, $headers = [], $sendErrors = true, $optHeaders = [], $flags = 0
+ ) {
+ $section = new ProfileSection( __METHOD__ );
if ( FileBackend::isStoragePath( $fname ) ) { // sanity
throw new MWException( __FUNCTION__ . " given storage path '$fname'." );
}
- MediaWiki\suppressWarnings();
- $stat = stat( $fname );
- MediaWiki\restoreWarnings();
-
- $res = self::prepareForStream( $fname, $stat, $headers, $sendErrors );
- if ( $res == self::NOT_MODIFIED ) {
- $ok = true; // use client cache
- } elseif ( $res == self::READY_STREAM ) {
- $ok = readfile( $fname );
- } else {
- $ok = false; // failed
+ // Don't stream it out as text/html if there was a PHP error
+ if ( ( ( $flags & self::STREAM_HEADLESS ) == 0 || $headers ) && headers_sent() ) {
+ echo "Headers already sent, terminating.\n";
+ return false;
}
- return $ok;
- }
+ $headerFunc = ( $flags & self::STREAM_HEADLESS )
+ ? function ( $header ) {
+ // no-op
+ }
+ : function ( $header ) {
+ is_int( $header ) ? HttpStatus::header( $header ) : header( $header );
+ };
+
+ MediaWiki\suppressWarnings();
+ $info = stat( $fname );
+ MediaWiki\restoreWarnings();
- /**
- * Call this function used in preparation before streaming a file.
- * This function does the following:
- * (a) sends Last-Modified, Content-type, and Content-Disposition headers
- * (b) cancels any PHP output buffering and automatic gzipping of output
- * (c) sends Content-Length header based on HTTP_IF_MODIFIED_SINCE check
- *
- * @param string $path Storage path or file system path
- * @param array|bool $info File stat info with 'mtime' and 'size' fields
- * @param array $headers Additional headers to send
- * @param bool $sendErrors Send error messages if errors occur (like 404)
- * @return int|bool READY_STREAM, NOT_MODIFIED, or false on failure
- */
- public static function prepareForStream(
- $path, $info, $headers = [], $sendErrors = true
- ) {
if ( !is_array( $info ) ) {
if ( $sendErrors ) {
- HttpStatus::header( 404 );
- header( 'Cache-Control: no-cache' );
- header( 'Content-Type: text/html; charset=utf-8' );
- $encFile = htmlspecialchars( $path );
- $encScript = htmlspecialchars( $_SERVER['SCRIPT_NAME'] );
- echo "<html><body>
- <h1>File not found</h1>
- <p>Although this PHP script ($encScript) exists, the file requested for output
- ($encFile) does not.</p>
- </body></html>
- ";
+ self::send404Message( $fname, $flags );
}
return false;
}
- // Sent Last-Modified HTTP header for client-side caching
- header( 'Last-Modified: ' . wfTimestamp( TS_RFC2822, $info['mtime'] ) );
+ // Send Last-Modified HTTP header for client-side caching
+ $headerFunc( 'Last-Modified: ' . wfTimestamp( TS_RFC2822, $info['mtime'] ) );
- // Cancel output buffering and gzipping if set
- wfResetOutputBuffers();
+ if ( ( $flags & self::STREAM_ALLOW_OB ) == 0 ) {
+ // Cancel output buffering and gzipping if set
+ wfResetOutputBuffers();
+ }
- $type = self::contentTypeFromPath( $path );
+ $type = self::contentTypeFromPath( $fname );
if ( $type && $type != 'unknown/unknown' ) {
- header( "Content-type: $type" );
+ $headerFunc( "Content-type: $type" );
} else {
// Send a content type which is not known to Internet Explorer, to
// avoid triggering IE's content type detection. Sending a standard
// unknown content type here essentially gives IE license to apply
// whatever content type it likes.
- header( 'Content-type: application/x-wiki' );
+ $headerFunc( 'Content-type: application/x-wiki' );
}
- // Don't stream it out as text/html if there was a PHP error
- if ( headers_sent() ) {
- echo "Headers already sent, terminating.\n";
- return false;
+ // Don't send if client has up to date cache
+ if ( isset( $optHeaders['if-modified-since'] ) ) {
+ $modsince = preg_replace( '/;.*$/', '', $optHeaders['if-modified-since'] );
+ if ( wfTimestamp( TS_UNIX, $info['mtime'] ) <= strtotime( $modsince ) ) {
+ ini_set( 'zlib.output_compression', 0 );
+ $headerFunc( 304 );
+ return true; // ok
+ }
}
// Send additional headers
foreach ( $headers as $header ) {
- header( $header );
+ header( $header ); // always use header(); specifically requested
}
- // Don't send if client has up to date cache
- if ( !empty( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) ) {
- $modsince = preg_replace( '/;.*$/', '', $_SERVER['HTTP_IF_MODIFIED_SINCE'] );
- if ( wfTimestamp( TS_UNIX, $info['mtime'] ) <= strtotime( $modsince ) ) {
- ini_set( 'zlib.output_compression', 0 );
- HttpStatus::header( 304 );
- return self::NOT_MODIFIED; // ok
+ if ( isset( $optHeaders['range'] ) ) {
+ $range = self::parseRange( $optHeaders['range'], $info['size'] );
+ if ( is_array( $range ) ) {
+ $headerFunc( 206 );
+ $headerFunc( 'Content-Length: ' . $range[2] );
+ $headerFunc( "Content-Range: bytes {$range[0]}-{$range[1]}/{$info['size']}" );
+ } elseif ( $range === 'invalid' ) {
+ if ( $sendErrors ) {
+ $headerFunc( 416 );
+ $headerFunc( 'Cache-Control: no-cache' );
+ $headerFunc( 'Content-Type: text/html; charset=utf-8' );
+ $headerFunc( 'Content-Range: bytes */' . $info['size'] );
+ }
+ return false;
+ } else { // unsupported Range request (e.g. multiple ranges)
+ $range = null;
+ $headerFunc( 'Content-Length: ' . $info['size'] );
+ }
+ } else {
+ $range = null;
+ $headerFunc( 'Content-Length: ' . $info['size'] );
+ }
+
+ if ( is_array( $range ) ) {
+ $handle = fopen( $fname, 'rb' );
+ if ( $handle ) {
+ $ok = true;
+ fseek( $handle, $range[0] );
+ $remaining = $range[2];
+ while ( $remaining > 0 && $ok ) {
+ $bytes = min( $remaining, 8 * 1024 );
+ $data = fread( $handle, $bytes );
+ $remaining -= $bytes;
+ $ok = ( $data !== false );
+ print $data;
+ }
+ } else {
+ return false;
}
+ } else {
+ return readfile( $fname ) !== false; // faster
}
- header( 'Content-Length: ' . $info['size'] );
+ return true;
+ }
+
+ /**
+ * Send out a standard 404 message for a file
+ *
+ * @param string $fname Full name and path of the file to stream
+ * @param integer $flags Bitfield of STREAM_* constants
+ * @since 1.24
+ */
+ public static function send404Message( $fname, $flags = 0 ) {
+ if ( ( $flags & self::STREAM_HEADLESS ) == 0 ) {
+ HttpStatus::header( 404 );
+ header( 'Cache-Control: no-cache' );
+ header( 'Content-Type: text/html; charset=utf-8' );
+ }
+ $encFile = htmlspecialchars( $fname );
+ $encScript = htmlspecialchars( $_SERVER['SCRIPT_NAME'] );
+ echo "<!DOCTYPE html><html><body>
+ <h1>File not found</h1>
+ <p>Although this PHP script ($encScript) exists, the file requested for output
+ ($encFile) does not.</p>
+ </body></html>
+ ";
+ }
- return self::READY_STREAM; // ok
+ /**
+ * Convert a Range header value to an absolute (start, end) range tuple
+ *
+ * @param string $range Range header value
+ * @param integer $size File size
+ * @return array|string Returns error string on failure (start, end, length)
+ * @since 1.24
+ */
+ public static function parseRange( $range, $size ) {
+ $m = [];
+ if ( preg_match( '#^bytes=(\d*)-(\d*)$#', $range, $m ) ) {
+ list( , $start, $end ) = $m;
+ if ( $start === '' && $end === '' ) {
+ $absRange = [ 0, $size - 1 ];
+ } elseif ( $start === '' ) {
+ $absRange = [ $size - $end, $size - 1 ];
+ } elseif ( $end === '' ) {
+ $absRange = [ $start, $size - 1 ];
+ } else {
+ $absRange = [ $start, $end ];
+ }
+ if ( $absRange[0] >= 0 && $absRange[1] >= $absRange[0] ) {
+ if ( $absRange[0] < $size ) {
+ $absRange[1] = min( $absRange[1], $size - 1 ); // stop at EOF
+ $absRange[2] = $absRange[1] - $absRange[0] + 1;
+ return $absRange;
+ } elseif ( $absRange[0] == 0 && $size == 0 ) {
+ return 'unrecognized'; // the whole file should just be sent
+ }
+ }
+ return 'invalid';
+ }
+ return 'unrecognized';
}
/**
// This handles the case when updates have to batched into several COMMITs.
$scopedLock = LinksUpdate::acquirePageLock( $this->mDb, $id );
+ $title = $this->page->getTitle();
+
// Delete restrictions for it
$this->mDb->delete( 'page_restrictions', [ 'pr_page' => $id ], __METHOD__ );
}
}
+ // Refresh the category table entry if it seems to have no pages. Check
+ // master for the most up-to-date cat_pages count.
+ if ( $title->getNamespace() === NS_CATEGORY ) {
+ $row = $this->mDb->selectRow(
+ 'category',
+ [ 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files' ],
+ [ 'cat_title' => $title->getDBkey(), 'cat_pages <= 0' ],
+ __METHOD__
+ );
+ if ( $row ) {
+ $cat = Category::newFromRow( $row, $title )->refreshCounts();
+ }
+ }
+
// If using cascading deletes, we can skip some explicit deletes
if ( !$this->mDb->cascadingDeletes() ) {
// Delete outgoing links
// If using cleanup triggers, we can skip some manual deletes
if ( !$this->mDb->cleanupTriggers() ) {
- $title = $this->page->getTitle();
// Find recentchanges entries to clean up...
$rcIdsForTitle = $this->mDb->selectFieldValues(
'recentchanges',
/**
* Stream the file at a storage path in the backend.
+ *
* If the file does not exists, an HTTP 404 error will be given.
* Appropriate HTTP headers (Status, Content-Type, Content-Length)
* will be sent if streaming began, while none will be sent otherwise.
* Implementations should flush the output buffer before sending data.
*
* @param array $params Parameters include:
- * - src : source storage path
- * - headers : list of additional HTTP headers to send on success
- * - latest : use the latest available data
+ * - src : source storage path
+ * - headers : list of additional HTTP headers to send if the file exists
+ * - options : HTTP request header map with lower case keys (since 1.28). Supports:
+ * range : format is "bytes=(\d*-\d*)"
+ * if-modified-since : format is an HTTP date
+ * - headless : only include the body (and headers from "headers") (since 1.28)
+ * - latest : use the latest available data
+ * - allowOB : preserve any output buffers (since 1.28)
* @return Status
*/
abstract public function streamFile( array $params );
$ps = Profiler::instance()->scopedProfileIn( __METHOD__ . "-{$this->name}" );
$status = Status::newGood();
- $info = $this->getFileStat( $params );
- if ( !$info ) { // let StreamFile handle the 404
- $status->fatal( 'backend-fail-notexists', $params['src'] );
- }
-
- // Set output buffer and HTTP headers for stream
- $extraHeaders = isset( $params['headers'] ) ? $params['headers'] : [];
- $res = StreamFile::prepareForStream( $params['src'], $info, $extraHeaders );
- if ( $res == StreamFile::NOT_MODIFIED ) {
- // do nothing; client cache is up to date
- } elseif ( $res == StreamFile::READY_STREAM ) {
- $status = $this->doStreamFile( $params );
- if ( !$status->isOK() ) {
- // Per bug 41113, nasty things can happen if bad cache entries get
- // stuck in cache. It's also possible that this error can come up
- // with simple race conditions. Clear out the stat cache to be safe.
- $this->clearCache( [ $params['src'] ] );
- $this->deleteFileCache( $params['src'] );
- trigger_error( "Bad stat cache or race condition for file {$params['src']}." );
- }
- } else {
+ // Always set some fields for subclass convenience
+ $params['options'] = isset( $params['options'] ) ? $params['options'] : [];
+ $params['headers'] = isset( $params['headers'] ) ? $params['headers'] : [];
+
+ // Don't stream it out as text/html if there was a PHP error
+ if ( ( empty( $params['headless'] ) || $params['headers'] ) && headers_sent() ) {
+ print "Headers already sent, terminating.\n";
$status->fatal( 'backend-fail-stream', $params['src'] );
+ return $status;
}
+ $status->merge( $this->doStreamFile( $params ) );
+
return $status;
}
protected function doStreamFile( array $params ) {
$status = Status::newGood();
+ $flags = 0;
+ $flags |= !empty( $params['headless'] ) ? StreamFile::STREAM_HEADLESS : 0;
+ $flags |= !empty( $params['allowOB'] ) ? StreamFile::STREAM_ALLOW_OB : 0;
+
$fsFile = $this->getLocalReference( $params );
- if ( !$fsFile ) {
- $status->fatal( 'backend-fail-stream', $params['src'] );
- } elseif ( !readfile( $fsFile->getPath() ) ) {
+
+ if ( $fsFile ) {
+ $res = StreamFile::stream( $fsFile->getPath(),
+ $params['headers'], true, $params['options'], $flags );
+ } else {
+ $res = false;
+ StreamFile::send404Message( $params['src'], $flags );
+ }
+
+ if ( !$res ) {
$status->fatal( 'backend-fail-stream', $params['src'] );
}
return $tmpFiles;
}
- protected function doStreamFile( array $params ) {
- $status = Status::newGood();
-
- $src = $this->resolveHashKey( $params['src'] );
- if ( $src === null || !isset( $this->files[$src] ) ) {
- $status->fatal( 'backend-fail-stream', $params['src'] );
-
- return $status;
- }
-
- print $this->files[$src]['data'];
-
- return $status;
- }
-
protected function doDirectoryExists( $container, $dir, array $params ) {
$prefix = rtrim( "$container/$dir", '/' ) . '/';
foreach ( $this->files as $path => $data ) {
protected function doStreamFile( array $params ) {
$status = Status::newGood();
+ $flags = !empty( $params['headless'] ) ? StreamFile::STREAM_HEADLESS : 0;
+
list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] );
if ( $srcRel === null ) {
+ StreamFile::send404Message( $params['src'], $flags );
$status->fatal( 'backend-fail-invalidpath', $params['src'] );
+
+ return $status;
}
$auth = $this->getAuthentication();
if ( !$auth || !is_array( $this->getContainerStat( $srcCont ) ) ) {
+ StreamFile::send404Message( $params['src'], $flags );
$status->fatal( 'backend-fail-stream', $params['src'] );
return $status;
}
- $handle = fopen( 'php://output', 'wb' );
+ // If "headers" is set, we only want to send them if the file is there.
+ // Do not bother checking if the file exists if headers are not set though.
+ if ( $params['headers'] && !$this->fileExists( $params ) ) {
+ StreamFile::send404Message( $params['src'], $flags );
+ $status->fatal( 'backend-fail-stream', $params['src'] );
+ return $status;
+ }
+
+ // Send the requested additional headers
+ foreach ( $params['headers'] as $header ) {
+ header( $header ); // aways send
+ }
+
+ if ( empty( $params['allowOB'] ) ) {
+ // Cancel output buffering and gzipping if set
+ wfResetOutputBuffers();
+ }
+
+ $handle = fopen( 'php://output', 'wb' );
list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $this->http->run( [
'method' => 'GET',
'url' => $this->storageUrl( $auth, $srcCont, $srcRel ),
'headers' => $this->authTokenHeaders( $auth )
- + $this->headersFromParams( $params ),
+ + $this->headersFromParams( $params ) + $params['options'],
'stream' => $handle,
+ 'flags' => [ 'relayResponseHeaders' => empty( $params['headless'] ) ]
] );
if ( $rcode >= 200 && $rcode <= 299 ) {
// good
} elseif ( $rcode === 404 ) {
$status->fatal( 'backend-fail-stream', $params['src'] );
+ // Per bug 41113, nasty things can happen if bad cache entries get
+ // stuck in cache. It's also possible that this error can come up
+ // with simple race conditions. Clear out the stat cache to be safe.
+ $this->clearCache( [ $params['src'] ] );
+ $this->deleteFileCache( $params['src'] );
} else {
$this->onError( $status, __METHOD__, $params, $rerr, $rcode, $rdesc );
}
*
* @param string $virtualUrl
* @param array $headers Additional HTTP headers to send on success
+ * @param array $optHeaders HTTP request headers (if-modified-since, range, ...)
* @return Status
* @since 1.27
*/
- public function streamFileWithStatus( $virtualUrl, $headers = [] ) {
+ public function streamFileWithStatus( $virtualUrl, $headers = [], $optHeaders = [] ) {
$path = $this->resolveToStoragePath( $virtualUrl );
- $params = [ 'src' => $path, 'headers' => $headers ];
+ $params = [ 'src' => $path, 'headers' => $headers, 'options' => $optHeaders ];
return $this->backend->streamFile( $params );
}
PopulateFilearchiveSha1::class,
PopulateBacklinkNamespace::class,
FixDefaultJsonContentPages::class,
+ CleanupEmptyCategories::class,
];
/**
* use application/x-www-form-urlencoded (headers sent automatically)
* - stream : resource to stream the HTTP response body to
* - proxy : HTTP proxy to use
+ * - flags : map of boolean flags which supports:
+ * - relayResponseHeaders : write out header via header()
* Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'.
*
* @author Aaron Schulz
$req['body'] = '';
$req['headers']['content-length'] = 0;
}
+ $req['flags'] = isset( $req['flags'] ) ? $req['flags'] : [];
$handles[$index] = $this->getCurlHandle( $req, $opts );
if ( count( $reqs ) > 1 ) {
// https://github.com/guzzle/guzzle/issues/349
curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
function ( $ch, $header ) use ( &$req ) {
+ if ( !empty( $req['flags']['relayResponseHeaders'] ) ) {
+ header( $header );
+ }
$length = strlen( $header );
$matches = [];
if ( preg_match( "/^(HTTP\/1\.[01]) (\d{3}) (.*)/", $header, $matches ) ) {
$title->touchLinks();
$title->purgeSquid();
$title->deleteTitleProtection();
+
+ if ( $title->getNamespace() == NS_CATEGORY ) {
+ // Load the Category object, which will schedule a job to create
+ // the category table row if necessary. Checking a slave is ok
+ // here, in the worst case it'll run an unnecessary recount job on
+ // a category that probably doesn't have many members.
+ Category::newFromTitle( $title )->getID();
+ }
}
/**
$cat = Category::newFromName( $catName );
Hooks::run( 'CategoryAfterPageRemoved', [ $cat, $this, $id ] );
}
+
+ // Refresh counts on categories that should be empty now, to
+ // trigger possible deletion. Check master for the most
+ // up-to-date cat_pages.
+ if ( count( $deleted ) ) {
+ $rows = $dbw->select(
+ 'category',
+ [ 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files' ],
+ [ 'cat_title' => $deleted, 'cat_pages <= 0' ],
+ $method
+ );
+ foreach ( $rows as $row ) {
+ $cat = Category::newFromRow( $row );
+ $cat->refreshCounts();
+ }
+ }
}
);
}
--- /dev/null
+<?php
+/**
+ * Clean up empty categories in the category table.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script to clean up empty categories in the category table.
+ *
+ * @ingroup Maintenance
+ * @since 1.28
+ */
+class CleanupEmptyCategories extends LoggedUpdateMaintenance {
+
+ public function __construct() {
+ parent::__construct();
+ $this->addDescription(
+ <<<TEXT
+This script will clean up the category table by removing entries for empty
+categories without a description page and adding entries for empty categories
+with a description page. It will print out progress indicators every batch. The
+script is perfectly safe to run on large, live wikis, and running it multiple
+times is harmless. You may want to use the throttling options if it's causing
+too much load; they will not affect correctness.
+
+If the script is stopped and later resumed, you can use the --mode and --begin
+options with the last printed progress indicator to pick up where you left off.
+
+When the script has finished, it will make a note of this in the database, and
+will not run again without the --force option.
+TEXT
+ );
+
+ $this->addOption(
+ 'mode',
+ '"add" empty categories with description pages, "remove" empty categories '
+ . 'without description pages, or "both"',
+ false,
+ true
+ );
+ $this->addOption(
+ 'begin',
+ 'Only do categories whose names are alphabetically after the provided name',
+ false,
+ true
+ );
+ $this->addOption(
+ 'throttle',
+ 'Wait this many milliseconds after each batch. Default: 0',
+ false,
+ true
+ );
+ }
+
+ protected function getUpdateKey() {
+ return 'cleanup empty categories';
+ }
+
+ protected function doDBUpdates() {
+ $mode = $this->getOption( 'mode', 'both' );
+ $begin = $this->getOption( 'begin', '' );
+ $throttle = $this->getOption( 'throttle', 0 );
+
+ if ( !in_array( $mode, [ 'add', 'remove', 'both' ] ) ) {
+ $this->output( "--mode must be 'add', 'remove', or 'both'.\n" );
+ return false;
+ }
+
+ $dbw = $this->getDB( DB_MASTER );
+
+ $throttle = intval( $throttle );
+
+ if ( $mode === 'add' || $mode === 'both' ) {
+ if ( $begin !== '' ) {
+ $where = [ 'page_title > ' . $dbw->addQuotes( $begin ) ];
+ } else {
+ $where = [];
+ }
+
+ $this->output( "Adding empty categories with description pages...\n" );
+ while ( true ) {
+ # Find which category to update
+ $rows = $dbw->select(
+ [ 'page', 'category' ],
+ 'page_title',
+ array_merge( $where, [
+ 'page_namespace' => NS_CATEGORY,
+ 'cat_title' => null,
+ ] ),
+ __METHOD__,
+ [
+ 'ORDER BY' => 'page_title',
+ 'LIMIT' => $this->mBatchSize,
+ ],
+ [
+ 'category' => [ 'LEFT JOIN', 'page_title = cat_title' ],
+ ]
+ );
+ if ( !$rows || $rows->numRows() <= 0 ) {
+ # Done, hopefully.
+ break;
+ }
+
+ foreach ( $rows as $row ) {
+ $name = $row->page_title;
+ $where = [ 'page_title > ' . $dbw->addQuotes( $name ) ];
+
+ # Use the row to update the category count
+ $cat = Category::newFromName( $name );
+ if ( !is_object( $cat ) ) {
+ $this->output( "The category named $name is not valid?!\n" );
+ } else {
+ $cat->refreshCounts();
+ }
+ }
+ $this->output( "--mode=$mode --begin=$name\n" );
+
+ wfWaitForSlaves();
+ usleep( $throttle * 1000 );
+ }
+
+ $begin = '';
+ }
+
+ if ( $mode === 'remove' || $mode === 'both' ) {
+ if ( $begin !== '' ) {
+ $where = [ 'cat_title > ' . $dbw->addQuotes( $begin ) ];
+ } else {
+ $where = [];
+ }
+ $i = 0;
+
+ $this->output( "Removing empty categories without description pages...\n" );
+ while ( true ) {
+ # Find which category to update
+ $rows = $dbw->select(
+ [ 'category', 'page' ],
+ 'cat_title',
+ array_merge( $where, [
+ 'page_title' => null,
+ 'cat_pages' => 0,
+ ] ),
+ __METHOD__,
+ [
+ 'ORDER BY' => 'cat_title',
+ 'LIMIT' => $this->mBatchSize,
+ ],
+ [
+ 'page' => [ 'LEFT JOIN', [
+ 'page_namespace' => NS_CATEGORY, 'page_title = cat_title'
+ ] ],
+ ]
+ );
+ if ( !$rows || $rows->numRows() <= 0 ) {
+ # Done, hopefully.
+ break;
+ }
+ foreach ( $rows as $row ) {
+ $name = $row->cat_title;
+ $where = [ 'cat_title > ' . $dbw->addQuotes( $name ) ];
+
+ # Use the row to update the category count
+ $cat = Category::newFromName( $name );
+ if ( !is_object( $cat ) ) {
+ $this->output( "The category named $name is not valid?!\n" );
+ } else {
+ $cat->refreshCounts();
+ }
+ }
+
+ $this->output( "--mode=remove --begin=$name\n" );
+
+ wfWaitForSlaves();
+ usleep( $throttle * 1000 );
+ }
+ }
+
+ $this->output( "Category cleanup complete.\n" );
+
+ return true;
+ }
+}
+
+$maintClass = 'CleanupEmptyCategories';
+require_once RUN_MAINTENANCE_IF_MAIN;
CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from);
--
--- Track all existing categories. Something is a category if 1) it has an en-
--- try somewhere in categorylinks, or 2) it once did. Categories might not
--- have corresponding pages, so they need to be tracked separately.
+-- Track all existing categories. Something is a category if 1) it has an entry
+-- somewhere in categorylinks, or 2) it has a description page. Categories
+-- might not have corresponding pages, so they need to be tracked separately.
--
CREATE TABLE /*_*/category (
-- Primary key
CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from);
--
--- Track all existing categories. Something is a category if 1) it has an en-
--- try somewhere in categorylinks, or 2) it once did. Categories might not
--- have corresponding pages, so they need to be tracked separately.
+-- Track all existing categories. Something is a category if 1) it has an entry
+-- somewhere in categorylinks, or 2) it has a description page. Categories
+-- might not have corresponding pages, so they need to be tracked separately.
--
CREATE TABLE /*_*/category (
-- Primary key
$this->tearDownFiles();
$this->doTestStreamFile( $path, $content, $alreadyExists );
$this->tearDownFiles();
+
+ $this->backend = $this->multiBackend;
+ $this->tearDownFiles();
+ $this->doTestStreamFile( $path, $content, $alreadyExists );
+ $this->tearDownFiles();
}
private function doTestStreamFile( $path, $content ) {
$backendName = $this->backendClass();
- // Test doStreamFile() directly to avoid header madness
- $class = new ReflectionClass( $this->backend );
- $method = $class->getMethod( 'doStreamFile' );
- $method->setAccessible( true );
-
if ( $content !== null ) {
$this->prepare( [ 'dir' => dirname( $path ) ] );
$status = $this->create( [ 'dst' => $path, 'content' => $content ] );
"Creation of file at $path succeeded ($backendName)." );
ob_start();
- $method->invokeArgs( $this->backend, [ [ 'src' => $path ] ] );
+ $this->backend->streamFile( [ 'src' => $path, 'headless' => 1, 'allowOB' => 1 ] );
$data = ob_get_contents();
ob_end_clean();
$this->assertEquals( $content, $data, "Correct content streamed from '$path'" );
} else { // 404 case
ob_start();
- $method->invokeArgs( $this->backend, [ [ 'src' => $path ] ] );
+ $this->backend->streamFile( [ 'src' => $path, 'headless' => 1, 'allowOB' => 1 ] );
$data = ob_get_contents();
ob_end_clean();
- $this->assertEquals( '', $data, "Correct content streamed from '$path' ($backendName)" );
+ $this->assertRegExp( '#<h1>File not found</h1>#', $data,
+ "Correct content streamed from '$path' ($backendName)" );
}
}
return $cases;
}
+ public function testStreamFileRange() {
+ $this->backend = $this->singleBackend;
+ $this->tearDownFiles();
+ $this->doTestStreamFileRange();
+ $this->tearDownFiles();
+
+ $this->backend = $this->multiBackend;
+ $this->tearDownFiles();
+ $this->doTestStreamFileRange();
+ $this->tearDownFiles();
+ }
+
+ private function doTestStreamFileRange() {
+ $backendName = $this->backendClass();
+
+ $base = self::baseStorePath();
+ $path = "$base/unittest-cont1/e/b/z/range_file.txt";
+ $content = "0123456789ABCDEF";
+
+ $this->prepare( [ 'dir' => dirname( $path ) ] );
+ $status = $this->create( [ 'dst' => $path, 'content' => $content ] );
+ $this->assertGoodStatus( $status,
+ "Creation of file at $path succeeded ($backendName)." );
+
+ static $ranges = [
+ 'bytes=0-0' => '0',
+ 'bytes=0-3' => '0123',
+ 'bytes=4-8' => '45678',
+ 'bytes=15-15' => 'F',
+ 'bytes=14-15' => 'EF',
+ 'bytes=-5' => 'BCDEF',
+ 'bytes=-1' => 'F',
+ 'bytes=10-16' => 'ABCDEF',
+ 'bytes=10-99' => 'ABCDEF',
+ ];
+
+ foreach ( $ranges as $range => $chunk ) {
+ ob_start();
+ $this->backend->streamFile( [ 'src' => $path, 'headless' => 1, 'allowOB' => 1,
+ 'options' => [ 'range' => $range ] ] );
+ $data = ob_get_contents();
+ ob_end_clean();
+
+ $this->assertEquals( $chunk, $data, "Correct chunk streamed from '$path' for '$range'" );
+ }
+ }
+
/**
* @dataProvider provider_testGetFileContents
* @covers FileBackend::getFileContents