3 * Creates a Google sitemap for the site
6 * @subpackage Maintenance
8 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason
9 * @copyright Copyright © 2005, Jens Frank <jeluf@gmx.de>
10 * @copyright Copyright © 2005, Brion Vibber <brion@pobox.com>
12 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
13 * @link http://www.google.com/schemas/sitemap/0.84/sitemap.xsd
15 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
18 $optionsWithArgs = array( 'host' );
20 require_once 'commandLine.inc';
22 if ( ! isset( $options['host'] ) ) {
23 echo "Usage: php generateSitemap.php --host=hostname\n";
26 $_SERVER['HOSTNAME'] = $options['host'];
29 $gs = new GenerateSitemap( $options['host'] );
32 class GenerateSitemap
{
35 var $priorities = array(
36 // Custom main namespaces
38 // Custom talk namesspaces
43 NS_USER_TALK
=> '0.1',
45 NS_PROJECT_TALK
=> '0.5',
47 NS_IMAGE_TALK
=> '0.1',
48 NS_MEDIAWIKI
=> '0.0',
49 NS_MEDIAWIKI_TALK
=> '0.0',
51 NS_TEMPLATE_TALK
=> '0.0',
53 NS_HELP_TALK
=> '0.1',
55 NS_CATEGORY_TALK
=> '0.1',
57 var $namespaces = array();
62 function GenerateSitemap( $host ) {
65 $this->stderr
= fopen( 'php://stderr', 'wt' );
68 $this->dbr
=& wfGetDB( DB_SLAVE
);
69 $this->generateNamespaces();
70 $this->findex
= fopen( "sitemap-index-$wgDBname.xml", 'wb' );
73 function generateNamespaces() {
74 $fname = 'GenerateSitemap::generateNamespaces';
76 $res = $this->dbr
->select( 'page',
77 array( 'page_namespace' ),
81 'GROUP BY' => 'page_namespace',
82 'ORDER BY' => 'page_namespace',
86 while ( $row = $this->dbr
->fetchObject( $res ) )
87 $this->namespaces
[] = $row->page_namespace
;
90 function priority( $namespace ) {
91 return isset( $this->priorities
[$namespace] ) ?
$this->priorities
[$namespace] : $this->guessPriority( $namespace );
94 function guessPriority( $namespace ) {
95 return Namespace::isTalk( $namespace ) ?
$this->priorities
[-1] : $this->priorities
[-2];
98 function getPageRes( $namespace ) {
99 $fname = 'GenerateSitemap::getPageRes';
101 return $this->dbr
->select( 'page',
108 array( 'page_namespace' => $namespace ),
116 fwrite( $this->findex
, $this->openIndex() );
118 foreach ( $this->namespaces
as $namespace ) {
119 $res = $this->getPageRes( $namespace );
123 while ( $row = $this->dbr
->fetchObject( $res ) ) {
124 if ( $i %
$this->cutoff
== 0 ) {
125 if ( $this->file
!== false ) {
126 gzwrite( $this->file
, $this->closeFile() );
127 gzclose( $this->file
);
130 $filename = "sitemap-$wgDBname-NS$namespace-$smcount.xml.gz";
131 $this->file
= gzopen( $filename, 'wb' );
132 $this->debug( $namespace );
133 gzwrite( $this->file
, $this->openFile() );
134 fwrite( $this->findex
, $this->indexEntry( $filename ) );
135 $this->debug( "\t$filename" );
138 $title = Title
::makeTitle( $row->page_namespace
, $row->page_title
);
139 $date = $this->ISO8601( $row->page_touched
);
140 gzwrite( $this->file
, $this->fileEntry( $title->getFullURL(), $date, $this->priority( $namespace ) ) );
143 gzwrite( $this->file
, $this->closeFile() );
144 gzclose( $this->file
);
147 fwrite( $this->findex
, $this->closeIndex() );
148 fclose( $this->findex
);
152 return '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
155 function xmlSchema() {
156 return 'http://www.google.com/schemas/sitemap/0.84';
159 function openIndex() {
160 return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n";
163 function indexEntry( $filename ) {
168 "\t\t<loc>$wgServer/$filename</log>\n" .
172 function closeIndex() {
173 return "</sitemapindex>\n";
176 function openFile() {
177 return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n";
180 function fileEntry( $url, $date, $priority ) {
183 "\t\t<loc>$url</loc>\n" .
184 "\t\t<lastmod>$date</lastmod>\n" .
185 "\t\t<priority>$priority</priority>\n" .
189 function closeFile() {
190 return "</urlset>\n";
193 function ISO8601( $timestamp ) {
194 return substr( wfTimestamp( TS_DB
, $timestamp ), 0, 4 +
1 +
2 +
1 +
2 );
197 function debug( $str ) {
198 fwrite( $this->stderr
, "$str\n" );