* Completely rewrote this to use OO
[lhc/web/wiklou.git] / maintenance / generateSitemap.php
1 <?php
2 /**
3 * Creates a Google sitemap for the site
4 *
5 * @package MediaWiki
6 * @subpackage Maintenance
7 *
8 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason
9 * @copyright Copyright © 2005, Jens Frank <jeluf@gmx.de>
10 * @copyright Copyright © 2005, Brion Vibber <brion@pobox.com>
11 *
12 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
13 * @link http://www.google.com/schemas/sitemap/0.84/sitemap.xsd
14 *
15 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
16 */
17
18 $optionsWithArgs = array( 'host' );
19 /* */
20 require_once 'commandLine.inc';
21
22 if ( ! isset( $options['host'] ) ) {
23 echo "Usage: php generateSitemap.php --host=hostname\n";
24 exit(1);
25 } else {
26 $_SERVER['HOSTNAME'] = $options['host'];
27 }
28
29 $gs = new GenerateSitemap( $options['host'] );
30 $gs->main();
31
32 class GenerateSitemap {
33 var $host;
34 var $cutoff = 9000;
35 var $priorities = array(
36 // Custom main namespaces
37 -2 => '0.5',
38 // Custom talk namesspaces
39 -1 => '0.1',
40 NS_MAIN => '1.0',
41 NS_TALK => '0.1',
42 NS_USER => '0.5',
43 NS_USER_TALK => '0.1',
44 NS_PROJECT => '0.5',
45 NS_PROJECT_TALK => '0.5',
46 NS_IMAGE => '0.5',
47 NS_IMAGE_TALK => '0.1',
48 NS_MEDIAWIKI => '0.0',
49 NS_MEDIAWIKI_TALK => '0.0',
50 NS_TEMPLATE => '0.0',
51 NS_TEMPLATE_TALK => '0.0',
52 NS_HELP => '0.5',
53 NS_HELP_TALK => '0.1',
54 NS_CATEGORY => '0.5',
55 NS_CATEGORY_TALK => '0.1',
56 );
57 var $namespaces = array();
58 var $dbr;
59 var $file, $findex;
60 var $stderr;
61
62 function GenerateSitemap( $host ) {
63 global $wgDBname;
64
65 $this->stderr = fopen( 'php://stderr', 'wt' );
66
67 $this->host = $host;
68 $this->dbr =& wfGetDB( DB_SLAVE );
69 $this->generateNamespaces();
70 $this->findex = fopen( "sitemap-index-$wgDBname.xml", 'wb' );
71 }
72
73 function generateNamespaces() {
74 $fname = 'GenerateSitemap::generateNamespaces';
75
76 $res = $this->dbr->select( 'page',
77 array( 'page_namespace' ),
78 array(),
79 $fname,
80 array(
81 'GROUP BY' => 'page_namespace',
82 'ORDER BY' => 'page_namespace',
83 )
84 );
85
86 while ( $row = $this->dbr->fetchObject( $res ) )
87 $this->namespaces[] = $row->page_namespace;
88 }
89
90 function priority( $namespace ) {
91 return isset( $this->priorities[$namespace] ) ? $this->priorities[$namespace] : $this->guessPriority( $namespace );
92 }
93
94 function guessPriority( $namespace ) {
95 return Namespace::isTalk( $namespace ) ? $this->priorities[-1] : $this->priorities[-2];
96 }
97
98 function getPageRes( $namespace ) {
99 $fname = 'GenerateSitemap::getPageRes';
100
101 return $this->dbr->select( 'page',
102 array(
103 'page_namespace',
104 'page_title',
105 'page_is_redirect',
106 'page_touched',
107 ),
108 array( 'page_namespace' => $namespace ),
109 $fname
110 );
111 }
112
113 function main() {
114 global $wgDBname;
115
116 fwrite( $this->findex, $this->openIndex() );
117
118 foreach ( $this->namespaces as $namespace ) {
119 $res = $this->getPageRes( $namespace );
120 $this->file = false;
121 $i = $smcount = 0;
122
123 while ( $row = $this->dbr->fetchObject( $res ) ) {
124 if ( $i % $this->cutoff == 0 ) {
125 if ( $this->file !== false ) {
126 gzwrite( $this->file, $this->closeFile() );
127 gzclose( $this->file );
128 }
129 ++$smcount;
130 $filename = "sitemap-$wgDBname-NS$namespace-$smcount.xml.gz";
131 $this->file = gzopen( $filename, 'wb' );
132 $this->debug( $namespace );
133 gzwrite( $this->file, $this->openFile() );
134 fwrite( $this->findex, $this->indexEntry( $filename ) );
135 $this->debug( "\t$filename" );
136 }
137 ++$i;
138 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
139 $date = $this->ISO8601( $row->page_touched );
140 gzwrite( $this->file, $this->fileEntry( $title->getFullURL(), $date, $this->priority( $namespace ) ) );
141 }
142 if ( $this->file ) {
143 gzwrite( $this->file, $this->closeFile() );
144 gzclose( $this->file );
145 }
146 }
147 fwrite( $this->findex, $this->closeIndex() );
148 fclose( $this->findex );
149 }
150
151 function xmlHead() {
152 return '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
153 }
154
155 function xmlSchema() {
156 return 'http://www.google.com/schemas/sitemap/0.84';
157 }
158
159 function openIndex() {
160 return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n";
161 }
162
163 function indexEntry( $filename ) {
164 global $wgServer;
165
166 return
167 "\t<sitemap>\n" .
168 "\t\t<loc>$wgServer/$filename</log>\n" .
169 "\t</sitemap>\n";
170 }
171
172 function closeIndex() {
173 return "</sitemapindex>\n";
174 }
175
176 function openFile() {
177 return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n";
178 }
179
180 function fileEntry( $url, $date, $priority ) {
181 return
182 "\t<url>\n" .
183 "\t\t<loc>$url</loc>\n" .
184 "\t\t<lastmod>$date</lastmod>\n" .
185 "\t\t<priority>$priority</priority>\n" .
186 "\t</url>\n";
187 }
188
189 function closeFile() {
190 return "</urlset>\n";
191 }
192
193 function ISO8601( $timestamp ) {
194 return substr( wfTimestamp( TS_DB, $timestamp ), 0, 4 + 1 + 2 + 1 + 2 );
195 }
196
197 function debug( $str ) {
198 fwrite( $this->stderr, "$str\n" );
199 }
200 }
201
202 ?>