Last $wgDBtype usages in core. Use a $db member variable so we can use it in other...
[lhc/web/wiklou.git] / maintenance / rebuildInterwiki.php
1 <?php
2 /**
3 * Rebuild interwiki table using the file on meta and the language list
4 * Wikimedia specific!
5 *
6 * @file
7 * @todo document
8 * @ingroup Maintenance
9 * @ingroup Wikimedia
10 */
11
12 /**
13 * @todo document
14 * @ingroup Maintenance
15 */
16 class Site {
17 var $suffix, $lateral, $url;
18
19 function __construct( $s, $l, $u ) {
20 $this->suffix = $s;
21 $this->lateral = $l;
22 $this->url = $u;
23 }
24
25 function getURL( $lang ) {
26 $xlang = str_replace( '_', '-', $lang );
27 return "http://$xlang.{$this->url}/wiki/\$1";
28 }
29 }
30
31 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
32
33 class RebuildInterwiki extends Maintenance {
34 public function __construct() {
35 parent::__construct();
36 $this->mDescription = "Rebuild the interwiki table using the file on meta and the language list.";
37 $this->addOption( 'langlist', 'File with one language code per line', false, true );
38 $this->addOption( 'dblist', 'File with one db per line', false, true );
39 $this->addOption( 'd', 'Output folder', false, true );
40 }
41
42 function execute() {
43 # List of language prefixes likely to be found in multi-language sites
44 $this->langlist = array_map( "trim", file( $this->getOption( 'langlist', "/home/wikipedia/common/langlist" ) ) );
45
46 # List of all database names
47 $this->dblist = array_map( "trim", file( $this->getOption( 'dblist', "/home/wikipedia/common/all.dblist" ) ) );
48
49 # Special-case databases
50 //$this->specials = array_flip( array_map( "trim", file( $this->getOption( 'specialdbs', "/home/wikipedia/common/special.dblist" ) ) ) );
51
52 $this->makeInterwikiSQL( $this->getOption( 'd', '/home/wikipedia/conf/interwiki/sql' ) );
53 }
54
55 function makeInterwikiSQL( $destDir ) {
56 $this->output( "Making new interwiki SQL files in $destDir\n" );
57
58 # Multi-language sites
59 # db suffix => db suffix, iw prefix, hostname
60 $sites = array(
61 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ),
62 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ),
63 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ),
64 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ),
65 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ),
66 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ),
67 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ),
68 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ),
69 );
70
71 # Special-case hostnames
72 $this->specials = array(
73 'sourceswiki' => 'sources.wikipedia.org',
74 'quotewiki' => 'wikiquote.org',
75 'textbookwiki' => 'wikibooks.org',
76 'sep11wiki' => 'sep11.wikipedia.org',
77 'metawiki' => 'meta.wikimedia.org',
78 'commonswiki' => 'commons.wikimedia.org',
79 'specieswiki' => 'species.wikimedia.org',
80 );
81
82 # Extra interwiki links that can't be in the intermap for some reason
83 $extraLinks = array(
84 array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ),
85 array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ),
86 array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ),
87 );
88
89 # Language aliases, usually configured as redirects to the real wiki in apache
90 # Interlanguage links are made directly to the real wiki
91 # Something horrible happens if you forget to list an alias here, I can't
92 # remember what
93 $this->languageAliases = array(
94 'zh-cn' => 'zh',
95 'zh-tw' => 'zh',
96 'dk' => 'da',
97 'nb' => 'no',
98 );
99
100 # Special case prefix rewrites, for the benefit of Swedish which uses s:t
101 # as an abbreviation for saint
102 $this->prefixRewrites = array(
103 'svwiki' => array( 's' => 'src' ),
104 );
105
106 # Construct a list of reserved prefixes
107 $reserved = array();
108 foreach ( $this->langlist as $lang ) {
109 $reserved[$lang] = 1;
110 }
111 foreach ( $this->languageAliases as $alias => $lang ) {
112 $reserved[$alias] = 1;
113 }
114 foreach ( $sites as $site ) {
115 $reserved[$site->lateral] = 1;
116 }
117
118 # Extract the intermap from meta
119 $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 );
120 $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) );
121
122 if ( !$lines || count( $lines ) < 2 ) {
123 $this->error( "m:Interwiki_map not found", true );
124 }
125
126 $iwArray = array();
127
128 foreach ( $lines as $line ) {
129 $matches = array();
130 if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(https?:\/\/.*?)\s*$/', $line, $matches ) ) {
131 $prefix = strtolower( $matches[1] );
132 $url = $matches[2];
133 if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) {
134 $local = 1;
135 } else {
136 $local = 0;
137 }
138
139 if ( empty( $reserved[$prefix] ) ) {
140 $iwArray[$prefix] = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local );
141 }
142 }
143 }
144
145 foreach ( $this->dblist as $db ) {
146 $sql = "-- Generated by rebuildInterwiki.php";
147 if ( isset( $this->specials[$db] ) ) {
148 # Special wiki
149 # Has interwiki links and interlanguage links to wikipedia
150
151 $host = $this->specials[$db];
152 $sql .= "\n--$host\n\n";
153 $sql .= "USE $db;\n" .
154 "TRUNCATE TABLE interwiki;\n" .
155 "INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES \n";
156 $first = true;
157
158 # Intermap links
159 foreach ( $iwArray as $iwEntry ) {
160 $sql .= $this->makeLink( $iwEntry, $first, $db );
161 }
162
163 # Links to multilanguage sites
164 foreach ( $sites as $targetSite ) {
165 $sql .= $this->makeLink( array( $targetSite->lateral, $targetSite->getURL( 'en' ), 1 ), $first, $db );
166 }
167
168 # Interlanguage links to wikipedia
169 $sql .= $this->makeLanguageLinks( $sites['wiki'], $first, $db );
170
171 # Extra links
172 foreach ( $extraLinks as $link ) {
173 $sql .= $this->makeLink( $link, $first, $db );
174 }
175
176 $sql .= ";\n";
177 } else {
178 # Find out which site this DB belongs to
179 $site = false;
180 foreach ( $sites as $candidateSite ) {
181 $suffix = $candidateSite->suffix;
182 if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) {
183 $site = $candidateSite;
184 break;
185 }
186 }
187 if ( !$site ) {
188 print "Invalid database $db\n";
189 continue;
190 }
191 $lang = $matches[1];
192 $host = "$lang." . $site->url;
193 $sql .= "\n--$host\n\n";
194
195 $sql .= "USE $db;\n" .
196 "TRUNCATE TABLE interwiki;\n" .
197 "INSERT INTO interwiki (iw_prefix,iw_url,iw_local) VALUES\n";
198 $first = true;
199
200 # Intermap links
201 foreach ( $iwArray as $iwEntry ) {
202 # Suppress links with the same name as the site
203 if ( ( $suffix == 'wiki' && $iwEntry['iw_prefix'] != 'wikipedia' ) ||
204 ( $suffix != 'wiki' && $suffix != $iwEntry['iw_prefix'] ) )
205 {
206 $sql .= $this->makeLink( $iwEntry, $first, $db );
207 }
208 }
209
210 # Lateral links
211 foreach ( $sites as $targetSite ) {
212 # Suppress link to self
213 if ( $targetSite->suffix != $site->suffix ) {
214 $sql .= $this->makeLink( array( $targetSite->lateral, $targetSite->getURL( $lang ), 1 ), $first, $db );
215 }
216 }
217
218 # Interlanguage links
219 $sql .= $this->makeLanguageLinks( $site, $first, $db );
220
221 # w link within wikipedias
222 # Other sites already have it as a lateral link
223 if ( $site->suffix == "wiki" ) {
224 $sql .= $this->makeLink( array( "w", "http://en.wikipedia.org/wiki/$1", 1 ), $first, $db );
225 }
226
227 # Extra links
228 foreach ( $extraLinks as $link ) {
229 $sql .= $this->makeLink( $link, $first, $db );
230 }
231 $sql .= ";\n";
232 }
233 file_put_contents( "$destDir/$db.sql", $sql );
234 }
235 }
236
237 # ------------------------------------------------------------------------------------------
238
239 # Returns part of an INSERT statement, corresponding to all interlanguage links to a particular site
240 function makeLanguageLinks( &$site, &$first, $source ) {
241 $sql = "";
242
243 # Actual languages with their own databases
244 foreach ( $this->langlist as $targetLang ) {
245 $sql .= $this->makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $first, $source );
246 }
247
248 # Language aliases
249 foreach ( $this->languageAliases as $alias => $lang ) {
250 $sql .= $this->makeLink( array( $alias, $site->getURL( $lang ), 1 ), $first, $source );
251 }
252 return $sql;
253 }
254
255 # Make SQL for a single link from an array
256 function makeLink( $entry, &$first, $source ) {
257
258 if ( isset( $this->prefixRewrites[$source] ) && isset($entry[0]) && isset( $this->prefixRewrites[$source][$entry[0]] ) ) {
259 $entry[0] = $this->prefixRewrites[$source][$entry[0]];
260 }
261
262 $sql = "";
263 # Add comma
264 if ( $first ) {
265 $first = false;
266 } else {
267 $sql .= ",\n";
268 }
269 $dbr = wfGetDB( DB_SLAVE );
270 $sql .= "(" . $dbr->makeList( $entry ) . ")";
271 return $sql;
272 }
273 }
274
275 $maintClass = "RebuildInterwiki";
276 require_once( DO_MAINTENANCE );
277