From e0e896089ef213a0fd3f5e773c122ce94e9997fc Mon Sep 17 00:00:00 2001 From: Jens Frank Date: Sat, 22 Oct 2005 10:40:49 +0000 Subject: [PATCH] Generator for Google sitemaps --- maintenance/generateSitemap.php | 116 ++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 maintenance/generateSitemap.php diff --git a/maintenance/generateSitemap.php b/maintenance/generateSitemap.php new file mode 100644 index 0000000000..9daa3dba87 --- /dev/null +++ b/maintenance/generateSitemap.php @@ -0,0 +1,116 @@ +, Brion Vibber +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# http://www.gnu.org/copyleft/gpl.html + +if ( $argc < 2) { + print "Usage: php generateSitemap.sql servername [options]\n"; + print " servername is the name of the website, e.g. mywiki.mydomain.org\n"; + exit ; +} +$_SERVER['HOSTNAME'] = $argv[1]; +print $argv[1] . "\n"; + + +/** */ +require_once( "commandLine.inc" ); + print "DB name: $wgDBname\n"; + print "DB user: $wgDBuser\n"; +# print "DB password: $wgDBpassword\n"; + + +$priorities = array ( + NS_MAIN => 0.9, + NS_TALK => 0.4, + NS_USER => 0.3, + NS_USER_TALK => 0.3, + NS_PROJECT => 0.5, + NS_PROJECT_TALK => 0.2, + NS_IMAGE => 0.2, + NS_IMAGE_TALK => 0.1, + NS_MEDIAWIKI => 0.1, + NS_MEDIAWIKI_TALK => 0.1, + NS_TEMPLATE => 0.1, + NS_TEMPLATE_TALK => 0.1, + NS_HELP => 0.3, + NS_HELP_TALK => 0.1, + NS_CATEGORY => 0.3, + NS_CATEGORY_TALK => 0.1, +); + +$dbr =& wfGetDB( DB_SLAVE ); +$page = $dbr->tableName( 'page' ); +$rev = $dbr->tableName( 'revision' ); + +$findex = fopen( "sitemap_index.xml", "wb" ); +fwrite( $findex, ' + + ' ); + +foreach ( $priorities as $ns => $priority) { + $sql = "SELECT page_namespace,page_title,page_is_redirect,rev_timestamp FROM $page, $rev ". + "WHERE page_namespace = $ns AND page_latest = rev_id "; + print "DB query : $sql\nprocessing ..."; + $res = $dbr->query( $sql ); + print " done\n"; + + $gzfile = false; + $rowcount=0; + $sitemapcount=0; + while ( $row = $dbr->fetchObject( $res ) ) { + if ( $rowcount % 9000 == 0 ) { + if ( $gzfile !== false ) { + gzwrite( $gzfile, '' ); + gzclose( $gzfile ); + } + $sitemapcount ++; + $fname = "sitemap-NS".$ns."-".$sitemapcount.".xml.gz"; + $gzfile = gzopen( $fname, "wb" ); + gzwrite( $gzfile, ' + < urlset xmlns="http://www.google.com/schemas/sitemap/0.84">' ); + fwrite( $findex, ''.$wgServer.'/'.$fname."\n" ); + print "$fname\n"; + } + $rowcount ++; + $nt = Title::makeTitle( $row->page_namespace, $row->page_title ); + $date = substr($row->rev_timestamp, 0, 4). '-' . + substr($row->rev_timestamp, 4, 2). '-' . + substr($row->rev_timestamp, 6, 2); + gzwrite( $gzfile, "\n " . $nt->getFullURL() . + "\n ".$date."\n " . + '' . $priority . '' . + "\n\n" ); + } + if ( $gzfile ) { + gzwrite( $gzfile, "\n" ); + gzclose( $gzfile ); + } + print "\n"; +} +fwrite( $findex, "\n" ); +fclose( $findex ); + + +?> -- 2.20.1