Merging Vector's navigation_urls and SkinTemplate's content_actions code into content...
[lhc/web/wiklou.git] / maintenance / rebuildInterwiki.php
1 <?php
2 /**
3 * Rebuild interwiki table using the file on meta and the language list
4 * Wikimedia specific!
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @todo document
23 * @ingroup Maintenance
24 * @ingroup Wikimedia
25 */
26
27 /**
28 * @todo document
29 * @ingroup Maintenance
30 */
31 class Site {
32 var $suffix, $lateral, $url;
33
34 function __construct( $s, $l, $u ) {
35 $this->suffix = $s;
36 $this->lateral = $l;
37 $this->url = $u;
38 }
39
40 function getURL( $lang ) {
41 $xlang = str_replace( '_', '-', $lang );
42 return "http://$xlang.{$this->url}/wiki/\$1";
43 }
44 }
45
46 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
47
48 class RebuildInterwiki extends Maintenance {
49 public function __construct() {
50 parent::__construct();
51 $this->mDescription = "Rebuild the interwiki table using the file on meta and the language list.";
52 $this->addOption( 'langlist', 'File with one language code per line', false, true );
53 $this->addOption( 'dblist', 'File with one db per line', false, true );
54 $this->addOption( 'd', 'Output folder', false, true );
55 }
56
57 function execute() {
58 # List of language prefixes likely to be found in multi-language sites
59 $this->langlist = array_map( "trim", file( $this->getOption( 'langlist', "/home/wikipedia/common/langlist" ) ) );
60
61 # List of all database names
62 $this->dblist = array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) );
63
64 # Special-case databases
65 //$this->specials = array_flip( array_map( "trim", file( $this->getOption( 'specialdbs', "/home/wikipedia/common/special.dblist" ) ) ) );
66
67 $this->makeInterwikiSQL( $this->getOption( 'd', '/home/wikipedia/conf/interwiki/sql' ) );
68 }
69
70 function makeInterwikiSQL( $destDir ) {
71 $this->output( "Making new interwiki SQL files in $destDir\n" );
72
73 # Multi-language sites
74 # db suffix => db suffix, iw prefix, hostname
75 $sites = array(
76 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ),
77 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ),
78 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ),
79 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ),
80 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ),
81 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ),
82 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ),
83 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ),
84 );
85
86 # Special-case hostnames
87 $this->specials = array(
88 'sourceswiki' => 'sources.wikipedia.org',
89 'quotewiki' => 'wikiquote.org',
90 'textbookwiki' => 'wikibooks.org',
91 'sep11wiki' => 'sep11.wikipedia.org',
92 'metawiki' => 'meta.wikimedia.org',
93 'commonswiki' => 'commons.wikimedia.org',
94 'specieswiki' => 'species.wikimedia.org',
95 );
96
97 # Extra interwiki links that can't be in the intermap for some reason
98 $extraLinks = array(
99 array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ),
100 array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ),
101 array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ),
102 );
103
104 # Language aliases, usually configured as redirects to the real wiki in apache
105 # Interlanguage links are made directly to the real wiki
106 # Something horrible happens if you forget to list an alias here, I can't
107 # remember what
108 $this->languageAliases = array(
109 'zh-cn' => 'zh',
110 'zh-tw' => 'zh',
111 'dk' => 'da',
112 'nb' => 'no',
113 );
114
115 # Special case prefix rewrites, for the benefit of Swedish which uses s:t
116 # as an abbreviation for saint
117 $this->prefixRewrites = array(
118 'svwiki' => array( 's' => 'src' ),
119 );
120
121 # Construct a list of reserved prefixes
122 $reserved = array();
123 foreach ( $this->langlist as $lang ) {
124 $reserved[$lang] = 1;
125 }
126 foreach ( $this->languageAliases as $alias => $lang ) {
127 $reserved[$alias] = 1;
128 }
129 foreach ( $sites as $site ) {
130 $reserved[$site->lateral] = 1;
131 }
132
133 # Extract the intermap from meta
134 $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 );
135 $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
136
137 if ( !$lines || count( $lines ) < 2 ) {
138 $this->error( "m:Interwiki_map not found", true );
139 }
140
141 $iwArray = array();
142
143 foreach ( $lines as $line ) {
144 $matches = array();
145 if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(https?:\/\/.*?)\s*$/', $line, $matches ) ) {
146 $prefix = strtolower( $matches[1] );
147 $url = $matches[2];
148 if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) {
149 $local = 1;
150 } else {
151 $local = 0;
152 }
153
154 if ( empty( $reserved[$prefix] ) ) {
155 $iwArray[$prefix] = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local );
156 }
157 }
158 }
159
160 foreach ( $this->dblist as $db ) {
161 $sql = "-- Generated by rebuildInterwiki.php";
162 if ( isset( $this->specials[$db] ) ) {
163 # Special wiki
164 # Has interwiki links and interlanguage links to wikipedia
165
166 $host = $this->specials[$db];
167 $sql .= "\n--$host\n\n";
168 $sql .= "USE $db;\n" .
169 "TRUNCATE TABLE interwiki;\n" .
170 "INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES \n";
171 $first = true;
172
173 # Intermap links
174 foreach ( $iwArray as $iwEntry ) {
175 $sql .= $this->makeLink( $iwEntry, $first, $db );
176 }
177
178 # Links to multilanguage sites
179 foreach ( $sites as $targetSite ) {
180 $sql .= $this->makeLink( array( $targetSite->lateral, $targetSite->getURL( 'en' ), 1 ), $first, $db );
181 }
182
183 # Interlanguage links to wikipedia
184 $sql .= $this->makeLanguageLinks( $sites['wiki'], $first, $db );
185
186 # Extra links
187 foreach ( $extraLinks as $link ) {
188 $sql .= $this->makeLink( $link, $first, $db );
189 }
190
191 $sql .= ";\n";
192 } else {
193 # Find out which site this DB belongs to
194 $site = false;
195 foreach ( $sites as $candidateSite ) {
196 $suffix = $candidateSite->suffix;
197 if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) {
198 $site = $candidateSite;
199 break;
200 }
201 }
202 if ( !$site ) {
203 print "Invalid database $db\n";
204 continue;
205 }
206 $lang = $matches[1];
207 $host = "$lang." . $site->url;
208 $sql .= "\n--$host\n\n";
209
210 $sql .= "USE $db;\n" .
211 "TRUNCATE TABLE interwiki;\n" .
212 "INSERT INTO interwiki (iw_prefix,iw_url,iw_local) VALUES\n";
213 $first = true;
214
215 # Intermap links
216 foreach ( $iwArray as $iwEntry ) {
217 # Suppress links with the same name as the site
218 if ( ( $suffix == 'wiki' && $iwEntry['iw_prefix'] != 'wikipedia' ) ||
219 ( $suffix != 'wiki' && $suffix != $iwEntry['iw_prefix'] ) )
220 {
221 $sql .= $this->makeLink( $iwEntry, $first, $db );
222 }
223 }
224
225 # Lateral links
226 foreach ( $sites as $targetSite ) {
227 # Suppress link to self
228 if ( $targetSite->suffix != $site->suffix ) {
229 $sql .= $this->makeLink( array( $targetSite->lateral, $targetSite->getURL( $lang ), 1 ), $first, $db );
230 }
231 }
232
233 # Interlanguage links
234 $sql .= $this->makeLanguageLinks( $site, $first, $db );
235
236 # w link within wikipedias
237 # Other sites already have it as a lateral link
238 if ( $site->suffix == "wiki" ) {
239 $sql .= $this->makeLink( array( "w", "http://en.wikipedia.org/wiki/$1", 1 ), $first, $db );
240 }
241
242 # Extra links
243 foreach ( $extraLinks as $link ) {
244 $sql .= $this->makeLink( $link, $first, $db );
245 }
246 $sql .= ";\n";
247 }
248 file_put_contents( "$destDir/$db.sql", $sql );
249 }
250 }
251
252 # ------------------------------------------------------------------------------------------
253
254 # Returns part of an INSERT statement, corresponding to all interlanguage links to a particular site
255 function makeLanguageLinks( &$site, &$first, $source ) {
256 $sql = "";
257
258 # Actual languages with their own databases
259 foreach ( $this->langlist as $targetLang ) {
260 $sql .= $this->makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $first, $source );
261 }
262
263 # Language aliases
264 foreach ( $this->languageAliases as $alias => $lang ) {
265 $sql .= $this->makeLink( array( $alias, $site->getURL( $lang ), 1 ), $first, $source );
266 }
267 return $sql;
268 }
269
270 # Make SQL for a single link from an array
271 function makeLink( $entry, &$first, $source ) {
272
273 if ( isset( $this->prefixRewrites[$source] ) && isset($entry[0]) && isset( $this->prefixRewrites[$source][$entry[0]] ) ) {
274 $entry[0] = $this->prefixRewrites[$source][$entry[0]];
275 }
276
277 $sql = "";
278 # Add comma
279 if ( $first ) {
280 $first = false;
281 } else {
282 $sql .= ",\n";
283 }
284 $dbr = wfGetDB( DB_SLAVE );
285 $sql .= "(" . $dbr->makeList( $entry ) . ")";
286 return $sql;
287 }
288 }
289
290 $maintClass = "RebuildInterwiki";
291 require_once( DO_MAINTENANCE );
292