From b73e0294895a33b8467cebf9b4f7427c7cc80ab4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 3 Nov 2005 09:31:13 +0000 Subject: [PATCH] * Optimization * Use the average length of page titles as a limit rather than the insane theoretica limit * 10MB = 20^2*10 not 20^2*1 --- maintenance/generateSitemap.php | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/maintenance/generateSitemap.php b/maintenance/generateSitemap.php index b5c0a7d4da..57bccba4fd 100644 --- a/maintenance/generateSitemap.php +++ b/maintenance/generateSitemap.php @@ -217,11 +217,11 @@ class GenerateSitemap { fwrite( $this->findex, $this->openIndex() ); - $this->generateLimit( NS_MAIN ); foreach ( $this->namespaces as $namespace ) { $res = $this->getPageRes( $namespace ); $this->file = false; $i = $smcount = 0; + $this->generateLimit( $namespace ); $this->debug( $namespace ); while ( $row = $this->dbr->fetchObject( $res ) ) { @@ -230,7 +230,6 @@ class GenerateSitemap { $this->write( $this->file, $this->closeFile() ); $this->close( $this->file ); } - $this->generateLimit( $namespace ); $filename = $this->sitemapFilename( $namespace, $smcount++ ); $this->file = $this->open( $this->fspath . $filename, 'wb' ); $this->write( $this->file, $this->openFile() ); @@ -412,18 +411,30 @@ class GenerateSitemap { * byte character in the title (63*4+1*3 = 255) */ function generateLimit( $namespace ) { - $title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" ); + //$title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" ); + $count = $this->getAveragePageLength( $namespace ); + $title = Title::makeTitle( $namespace, str_repeat( 'a', $count ) ); $olen = strlen( $this->openFile() ); $elen = strlen( $this->fileEntry( $title->getFullUrl(), wfTimestamp( TS_ISO_8601, wfTimestamp() ), '1.0' ) ); $clen = strlen( $this->closeFile() ); - for ( $i = 1, $etot = $elen; ( $olen + $clen + $etot + $elen ) <= pow( 2, 20 ); ++$i ) + for ( $i = 1, $etot = $elen; $olen + $clen + $etot + $elen <= pow( 2, 20 ) * 10; ++$i ) $etot += $elen; $this->limit = $i; } + function getAveragePageLength( $namespace ) { + $fname = 'GenerateSitemap::getAveragePageLength'; + + return $this->dbr->selectField( 'page', + 'CEIL(AVG(LENGTH(page_title)))', + array( 'page_namespace' => $namespace ), + $fname + ); + } + /** * Update $this->timestamp to the current time */ -- 2.20.1