From 2f82b79fd7467d1a1e65d8e3bab8600b723a6510 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 27 Jun 2006 21:48:43 +0000 Subject: [PATCH] * Allow fetching all revisions from transwiki Special:Import * Allow fetching all revisions from Special:Export GET request * Disable output buffering on Special:Export; should help with streaming large numbers of history items. * Allow setting a maximum number of revisions for history Special:Export; pages with more than $wgExportMaxHistory revisions are excluded from export when history is requested. * Fix transwiki import of pages with space in name --- RELEASE-NOTES | 8 ++++++ includes/DefaultSettings.php | 8 ++++++ includes/Revision.php | 17 +++++++++++++ includes/SpecialExport.php | 43 +++++++++++++++++++++++++------- includes/SpecialImport.php | 48 +++++++++++++++++++++++++++--------- languages/Messages.php | 5 ++++ 6 files changed, 108 insertions(+), 21 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index e73ee37e5c..9379699a8d 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -576,6 +576,14 @@ Some default configuration options have changed: * (bug 2483) Run link updates on change via XML import * (bug 2481) List imported pages during Special:Import * (bug 2482) Log and RC entries for Special:Import events +* Allow fetching all revisions from transwiki Special:Import +* Allow fetching all revisions from Special:Export GET request +* Disable output buffering on Special:Export; should help with streaming + large numbers of history items. +* Allow setting a maximum number of revisions for history Special:Export; + pages with more than $wgExportMaxHistory revisions are excluded from + export when history is requested. +* Fix transwiki import of pages with space in name == Compatibility == diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index eb2b07bd6e..c4b2296202 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -1470,6 +1470,14 @@ $wgImportSources = array(); * disabled on Wikimedia's sites. */ $wgExportAllowHistory = true; + +/** + * If set nonzero, Special:Export requests for history of pages with + * more revisions than this will be rejected. On some big sites things + * could get bogged down by very very long pages. + */ +$wgExportMaxHistory = 0; + $wgExportAllowListContributors = false ; diff --git a/includes/Revision.php b/includes/Revision.php index 3139f241bd..653bacb8d9 100644 --- a/includes/Revision.php +++ b/includes/Revision.php @@ -768,6 +768,23 @@ class Revision { } return $timestamp; } + + static function countByPageId( $db, $id ) { + $row = $db->selectRow( 'revision', 'COUNT(*) AS revCount', + array( 'rev_page' => $id ), __METHOD__ ); + if( $row ) { + return $row->revCount; + } + return 0; + } + + static function countByTitle( $db, $title ) { + $id = $title->getArticleId(); + if( $id ) { + return Revision::countByPageId( $db, $id ); + } + return 0; + } } /** diff --git a/includes/SpecialExport.php b/includes/SpecialExport.php index 1f9a7f3a0e..e589b57b1b 100644 --- a/includes/SpecialExport.php +++ b/includes/SpecialExport.php @@ -30,17 +30,18 @@ require_once( 'Export.php' ); */ function wfSpecialExport( $page = '' ) { global $wgOut, $wgRequest, $wgExportAllowListContributors; - global $wgExportAllowHistory; + global $wgExportAllowHistory, $wgExportMaxHistory; + $curonly = true; if( $wgRequest->getVal( 'action' ) == 'submit') { $page = $wgRequest->getText( 'pages' ); - if( $wgExportAllowHistory ) { - $curonly = $wgRequest->getCheck( 'curonly' ); - } else { - $curonly = true; - } - } else { - # Pre-check the 'current version only' box in the UI + $curonly = $wgRequest->getCheck( 'curonly' ); + } + if( $wgRequest->getCheck( 'history' ) ) { + $curonly = false; + } + if( !$wgExportAllowHistory ) { + // Override $curonly = true; } @@ -49,6 +50,15 @@ function wfSpecialExport( $page = '' ) { if( $page != '' ) { $wgOut->disable(); + + // Cancel output buffering and gzipping if set + // This should provide safer streaming for pages with history + while( $status = ob_get_status() ) { + ob_end_clean(); + if( $status['name'] == 'ob_gzhandler' ) { + header( 'Content-Encoding:' ); + } + } header( "Content-type: application/xml; charset=utf-8" ); $pages = explode( "\n", $page ); @@ -57,7 +67,22 @@ function wfSpecialExport( $page = '' ) { $exporter = new WikiExporter( $db, $history ); $exporter->list_authors = $list_authors ; $exporter->openStream(); - $exporter->pagesByName( $pages ); + + foreach( $pages as $page ) { + if( $wgExportMaxHistory && !$curonly ) { + $title = Title::newFromText( $page ); + if( $title ) { + $count = Revision::countByTitle( $db, $title ); + if( $count > $wgExportMaxHistory ) { + wfDebug( __FUNCTION__ . + ": Skipped $page, $count revisions too big\n" ); + continue; + } + } + } + $exporter->pagesByName( $pages ); + } + $exporter->closeStream(); return; } diff --git a/includes/SpecialImport.php b/includes/SpecialImport.php index 87fbd7ebb4..65ae089763 100644 --- a/includes/SpecialImport.php +++ b/includes/SpecialImport.php @@ -48,10 +48,12 @@ function wfSpecialImport( $page = '' ) { } break; case "interwiki": - $interwiki = $wgRequest->getVal( "interwiki" ); + $interwiki = $wgRequest->getVal( 'interwiki' ); + $history = $wgRequest->getCheck( 'interwikiHistory' ); $source = ImportStreamSource::newFromInterwiki( $interwiki, - $wgRequest->getText( "frompage" ) ); + $wgRequest->getText( "frompage" ), + $history ); break; default: $source = new WikiError( "Unknown import source type" ); @@ -105,19 +107,39 @@ function wfSpecialImport( $page = '' ) { $wgOut->addHTML( "
" . wfMsgHtml('importinterwiki') . " -
+ " . + $wgOut->parse( wfMsg( 'import-interwiki-text' ) ) . " - " ); foreach( $wgImportSources as $interwiki ) { $iw = htmlspecialchars( $interwiki ); $wgOut->addHTML( "\n" ); } $wgOut->addHTML( " - - - + + + " . + wfInput( 'frompage', 40 ) . + " + + + + " . + wfCheckLabel( wfMsg( 'import-interwiki-history' ), + 'interwikiHistory', 'interwikiHistory', true ) . + " + + + + " . + wfSubmitButton( wfMsg( 'import-interwiki-submit' ) ) . + " + +
" ); @@ -705,6 +727,7 @@ class ImportStreamSource { } function newFromURL( $url ) { + wfDebug( __METHOD__ . ": opening $url\n" ); # fopen-wrappers are normally turned off for security. ini_set( "allow_url_fopen", true ); $ret = ImportStreamSource::newFromFile( $url ); @@ -712,13 +735,14 @@ class ImportStreamSource { return $ret; } - function newFromInterwiki( $interwiki, $page ) { + function newFromInterwiki( $interwiki, $page, $history=false ) { $base = Title::getInterwikiLink( $interwiki ); - if( empty( $base ) ) { + $link = Title::newFromText( "$interwiki:Special:Export/$page" ); + if( empty( $base ) || empty( $link ) ) { return new WikiError( 'Bad interwiki link' ); } else { - $import = wfUrlencode( "Special:Export/$page" ); - $url = str_replace( "$1", $import, $base ); + $params = $history ? 'history=1' : ''; + $url = $link->getFullUrl( $params ); return ImportStreamSource::newFromURL( $url ); } } diff --git a/languages/Messages.php b/languages/Messages.php index 06c9aba8a7..ce93b4b1ea 100644 --- a/languages/Messages.php +++ b/languages/Messages.php @@ -1457,6 +1457,11 @@ In the latter case you can also use a link, e.g. [[{{ns:Special}}:Export/{{int:m # Special:Import 'import' => 'Import pages', 'importinterwiki' => 'Transwiki import', +'import-interwiki-text' => 'Select a wiki and page title to import. +Revision dates and editors\' names will be preserved. +All transwiki import actions are logged at the [[Special:Log/import|import log]].', +'import-interwiki-history' => 'Copy all history versions for this page', +'import-interwiki-submit' => 'Import', 'importtext' => 'Please export the file from the source wiki using the Special:Export utility, save it to your disk and upload it here.', 'importstart' => "Importing pages...", 'importnopages' => "No pages to import.", -- 2.20.1