* Optimization
authorÆvar Arnfjörð Bjarmason <avar@users.mediawiki.org>
Thu, 3 Nov 2005 09:31:13 +0000 (09:31 +0000)
committerÆvar Arnfjörð Bjarmason <avar@users.mediawiki.org>
Thu, 3 Nov 2005 09:31:13 +0000 (09:31 +0000)
* Use the average length of page titles as a limit rather than the insane theoretica limit
* 10MB = 20^2*10 not 20^2*1

maintenance/generateSitemap.php

index b5c0a7d..57bccba 100644 (file)
@@ -217,11 +217,11 @@ class GenerateSitemap {
 
                fwrite( $this->findex, $this->openIndex() );
                
-               $this->generateLimit( NS_MAIN );
                foreach ( $this->namespaces as $namespace ) {
                        $res = $this->getPageRes( $namespace );
                        $this->file = false;
                        $i = $smcount = 0;
+                       $this->generateLimit( $namespace );
                        
                        $this->debug( $namespace );
                        while ( $row = $this->dbr->fetchObject( $res ) ) {
@@ -230,7 +230,6 @@ class GenerateSitemap {
                                                $this->write( $this->file, $this->closeFile() );
                                                $this->close( $this->file );
                                        }
-                                       $this->generateLimit( $namespace );
                                        $filename = $this->sitemapFilename( $namespace, $smcount++ );
                                        $this->file = $this->open( $this->fspath . $filename, 'wb' );
                                        $this->write( $this->file, $this->openFile() );
@@ -412,18 +411,30 @@ class GenerateSitemap {
         * byte character in the title (63*4+1*3 = 255)
         */
        function generateLimit( $namespace ) {
-               $title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" );
+               //$title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" );
+               $count = $this->getAveragePageLength( $namespace );
+               $title = Title::makeTitle( $namespace, str_repeat( 'a', $count ) );
                
                $olen = strlen( $this->openFile() );
                $elen = strlen( $this->fileEntry( $title->getFullUrl(), wfTimestamp( TS_ISO_8601, wfTimestamp() ), '1.0' ) );
                $clen = strlen( $this->closeFile() );
 
-               for ( $i = 1, $etot = $elen; ( $olen + $clen + $etot + $elen ) <= pow( 2, 20 ); ++$i )
+               for ( $i = 1, $etot = $elen; $olen + $clen + $etot + $elen <= pow( 2, 20 ) * 10; ++$i )
                        $etot += $elen;
                
                $this->limit = $i;
        }
 
+       function getAveragePageLength( $namespace ) {
+               $fname = 'GenerateSitemap::getAveragePageLength';
+               
+               return $this->dbr->selectField( 'page',
+                       'CEIL(AVG(LENGTH(page_title)))',
+                       array( 'page_namespace' => $namespace ),
+                       $fname
+               );      
+       }
+
        /**
         * Update $this->timestamp to the current time
         */