Revert r39949 "* Revert revert r39662 of my parser changes."
[lhc/web/wiklou.git] / includes / parser / LinkHolderArray.php
1 <?php
2
3 class LinkHolderArray {
4 var $batchSize = 1000;
5
6 var $internals = array(), $interwikis = array();
7 var $size = 0;
8 var $parent;
9
10 function __construct( $parent ) {
11 $this->parent = $parent;
12 }
13
14 /**
15 * Merge another LinkHolderArray into this one
16 */
17 function merge( $other ) {
18 foreach ( $other->internals as $ns => $entries ) {
19 $this->size += count( $entries );
20 if ( !isset( $this->internals[$ns] ) ) {
21 $this->internals[$ns] = $entries;
22 } else {
23 $this->internals[$ns] += $entries;
24 }
25 }
26 $this->interwikis += $other->interwikis;
27 }
28
29 /**
30 * Returns true if the memory requirements of this object are getting large
31 */
32 function isBig() {
33 return $this->size > $this->batchSize;
34 }
35
36 /**
37 * Clear all stored link holders.
38 * Make sure you don't have any text left using these link holders, before you call this
39 */
40 function clear() {
41 $this->internals = array();
42 $this->interwikis = array();
43 $this->size = 0;
44 }
45
46 /**
47 * Make a link placeholder. The text returned can be later resolved to a real link with
48 * replaceLinkHolders(). This is done for two reasons: firstly to avoid further
49 * parsing of interwiki links, and secondly to allow all existence checks and
50 * article length checks (for stub links) to be bundled into a single query.
51 *
52 */
53 function makeHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
54 wfProfileIn( __METHOD__ );
55 if ( ! is_object($nt) ) {
56 # Fail gracefully
57 $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
58 } else {
59 # Separate the link trail from the rest of the link
60 list( $inside, $trail ) = Linker::splitTrail( $trail );
61
62 $entry = array(
63 'title' => $nt,
64 'text' => $prefix.$text.$inside,
65 'pdbk' => $nt->getPrefixedDBkey(),
66 );
67 if ( $query !== '' ) {
68 $entry['query'] = $query;
69 }
70
71 if ( $nt->isExternal() ) {
72 // Use a globally unique ID to keep the objects mergable
73 $key = $this->parent->nextLinkID();
74 $this->interwikis[$key] = $entry;
75 $retVal = "<!--IWLINK $key-->{$trail}";
76 } else {
77 $key = $this->parent->nextLinkID();
78 $ns = $nt->getNamespace();
79 $this->internals[$ns][$key] = $entry;
80 $retVal = "<!--LINK $ns:$key-->{$trail}";
81 }
82 $this->size++;
83 }
84 wfProfileOut( __METHOD__ );
85 return $retVal;
86 }
87
88 /**
89 * Replace <!--LINK--> link placeholders with actual links, in the buffer
90 * Placeholders created in Skin::makeLinkObj()
91 * Returns an array of link CSS classes, indexed by PDBK.
92 */
93 function replace( &$text ) {
94 wfProfileIn( __METHOD__ );
95
96 $colours = $this->replaceInternal( $text );
97 $this->replaceInterwiki( $text );
98
99 wfProfileOut( __METHOD__ );
100 return $colours;
101 }
102
103 /**
104 * Replace internal links
105 */
106 protected function replaceInternal( &$text ) {
107 if ( !$this->internals ) {
108 return;
109 }
110
111 wfProfileIn( __METHOD__ );
112 global $wgUser, $wgContLang;
113
114 $pdbks = array();
115 $colours = array();
116 $linkcolour_ids = array();
117 $sk = $this->parent->getOptions()->getSkin();
118 $linkCache = LinkCache::singleton();
119 $output = $this->parent->getOutput();
120
121 wfProfileIn( __METHOD__.'-check' );
122 $dbr = wfGetDB( DB_SLAVE );
123 $page = $dbr->tableName( 'page' );
124 $threshold = $wgUser->getOption('stubthreshold');
125
126 # Sort by namespace
127 ksort( $this->internals );
128
129 # Generate query
130 $query = false;
131 $current = null;
132 foreach ( $this->internals as $ns => $entries ) {
133 foreach ( $entries as $index => $entry ) {
134 $key = "$ns:$index";
135 $title = $entry['title'];
136 $pdbk = $entry['pdbk'];
137
138 # Skip invalid entries.
139 # Result will be ugly, but prevents crash.
140 if ( is_null( $title ) ) {
141 continue;
142 }
143
144 # Check if it's a static known link, e.g. interwiki
145 if ( $title->isAlwaysKnown() ) {
146 $colours[$pdbk] = '';
147 } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
148 $colours[$pdbk] = '';
149 $output->addLink( $title, $id );
150 } elseif ( $linkCache->isBadLink( $pdbk ) ) {
151 $colours[$pdbk] = 'new';
152 } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) {
153 $colours[$pdbk] = 'new';
154 } else {
155 # Not in the link cache, add it to the query
156 if ( !isset( $current ) ) {
157 $current = $ns;
158 $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
159 $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN(";
160 } elseif ( $current != $ns ) {
161 $current = $ns;
162 $query .= ")) OR (page_namespace=$ns AND page_title IN(";
163 } else {
164 $query .= ', ';
165 }
166
167 $query .= $dbr->addQuotes( $title->getDBkey() );
168 }
169 }
170 }
171 if ( $query ) {
172 $query .= '))';
173
174 $res = $dbr->query( $query, __METHOD__ );
175
176 # Fetch data and form into an associative array
177 # non-existent = broken
178 while ( $s = $dbr->fetchObject($res) ) {
179 $title = Title::makeTitle( $s->page_namespace, $s->page_title );
180 $pdbk = $title->getPrefixedDBkey();
181 $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect );
182 $output->addLink( $title, $s->page_id );
183 $colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
184 //add id to the extension todolist
185 $linkcolour_ids[$s->page_id] = $pdbk;
186 }
187 unset( $res );
188 //pass an array of page_ids to an extension
189 wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
190 }
191 wfProfileOut( __METHOD__.'-check' );
192
193 # Do a second query for different language variants of links and categories
194 if($wgContLang->hasVariants()){
195 $linkBatch = new LinkBatch();
196 $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
197 $categoryMap = array(); // maps $category_variant => $category (dbkeys)
198 $varCategories = array(); // category replacements oldDBkey => newDBkey
199
200 $categories = $output->getCategoryLinks();
201
202 // Add variants of links to link batch
203 foreach ( $this->internals as $ns => $entries ) {
204 foreach ( $entries as $index => $entry ) {
205 $key = "$ns:$index";
206 $pdbk = $entry['pdbk'];
207 $title = $entry['title'];
208 $titleText = $title->getText();
209
210 // generate all variants of the link title text
211 $allTextVariants = $wgContLang->convertLinkToAllVariants($titleText);
212
213 // if link was not found (in first query), add all variants to query
214 if ( !isset($colours[$pdbk]) ){
215 foreach($allTextVariants as $textVariant){
216 if($textVariant != $titleText){
217 $variantTitle = Title::makeTitle( $ns, $textVariant );
218 if(is_null($variantTitle)) continue;
219 $linkBatch->addObj( $variantTitle );
220 $variantMap[$variantTitle->getPrefixedDBkey()][] = $key;
221 }
222 }
223 }
224 }
225 }
226
227 // process categories, check if a category exists in some variant
228 foreach( $categories as $category ){
229 $variants = $wgContLang->convertLinkToAllVariants($category);
230 foreach($variants as $variant){
231 if($variant != $category){
232 $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) );
233 if(is_null($variantTitle)) continue;
234 $linkBatch->addObj( $variantTitle );
235 $categoryMap[$variant] = $category;
236 }
237 }
238 }
239
240
241 if(!$linkBatch->isEmpty()){
242 // construct query
243 $titleClause = $linkBatch->constructSet('page', $dbr);
244
245 $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
246
247 $variantQuery .= " FROM $page WHERE $titleClause";
248
249 $varRes = $dbr->query( $variantQuery, __METHOD__ );
250
251 // for each found variants, figure out link holders and replace
252 while ( $s = $dbr->fetchObject($varRes) ) {
253
254 $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
255 $varPdbk = $variantTitle->getPrefixedDBkey();
256 $vardbk = $variantTitle->getDBkey();
257
258 $holderKeys = array();
259 if(isset($variantMap[$varPdbk])){
260 $holderKeys = $variantMap[$varPdbk];
261 $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect );
262 $output->addLink( $variantTitle, $s->page_id );
263 }
264
265 // loop over link holders
266 foreach($holderKeys as $key){
267 list( $ns, $index ) = explode( ':', $key, 2 );
268 $entry =& $this->internals[$ns][$index];
269 $pdbk = $entry['pdbk'];
270
271 if(!isset($colours[$pdbk])){
272 // found link in some of the variants, replace the link holder data
273 $entry['title'] = $variantTitle;
274 $entry['pdbk'] = $varPdbk;
275
276 // set pdbk and colour
277 $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold );
278 $linkcolour_ids[$s->page_id] = $pdbk;
279 }
280 wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
281 }
282
283 // check if the object is a variant of a category
284 if(isset($categoryMap[$vardbk])){
285 $oldkey = $categoryMap[$vardbk];
286 if($oldkey != $vardbk)
287 $varCategories[$oldkey]=$vardbk;
288 }
289 }
290
291 // rebuild the categories in original order (if there are replacements)
292 if(count($varCategories)>0){
293 $newCats = array();
294 $originalCats = $output->getCategories();
295 foreach($originalCats as $cat => $sortkey){
296 // make the replacement
297 if( array_key_exists($cat,$varCategories) )
298 $newCats[$varCategories[$cat]] = $sortkey;
299 else $newCats[$cat] = $sortkey;
300 }
301 $this->parent->mOutput->setCategoryLinks($newCats);
302 }
303 }
304 }
305
306 # Construct search and replace arrays
307 wfProfileIn( __METHOD__.'-construct' );
308 $replacePairs = array();
309 foreach ( $this->internals as $ns => $entries ) {
310 foreach ( $entries as $index => $entry ) {
311 $pdbk = $entry['pdbk'];
312 $title = $entry['title'];
313 $query = isset( $entry['query'] ) ? $entry['query'] : '';
314 $key = "$ns:$index";
315 $searchkey = "<!--LINK $key-->";
316 if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) {
317 $linkCache->addBadLinkObj( $title );
318 $colours[$pdbk] = 'new';
319 $output->addLink( $title, 0 );
320 $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title,
321 $entry['text'],
322 $query );
323 } else {
324 $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk],
325 $entry['text'],
326 $query );
327 }
328 }
329 }
330 $replacer = new HashtableReplacer( $replacePairs, 1 );
331 wfProfileOut( __METHOD__.'-construct' );
332
333 # Do the thing
334 wfProfileIn( __METHOD__.'-replace' );
335 $text = preg_replace_callback(
336 '/(<!--LINK .*?-->)/',
337 $replacer->cb(),
338 $text);
339
340 wfProfileOut( __METHOD__.'-replace' );
341 wfProfileOut( __METHOD__ );
342 }
343
344 /**
345 * Replace interwiki links
346 */
347 protected function replaceInterwiki( &$text ) {
348 if ( empty( $this->interwikis ) ) {
349 return;
350 }
351
352 wfProfileIn( __METHOD__ );
353 # Make interwiki link HTML
354 $sk = $this->parent->getOptions()->getSkin();
355 $replacePairs = array();
356 foreach( $this->interwikis as $key => $link ) {
357 $replacePairs[$key] = $sk->link( $link['title'], $link['text'] );
358 }
359 $replacer = new HashtableReplacer( $replacePairs, 1 );
360
361 $text = preg_replace_callback(
362 '/<!--IWLINK (.*?)-->/',
363 $replacer->cb(),
364 $text );
365 wfProfileOut( __METHOD__ );
366 }
367
368 /**
369 * Replace <!--LINK--> link placeholders with plain text of links
370 * (not HTML-formatted).
371 * @param string $text
372 * @return string
373 */
374 function replaceText( $text ) {
375 wfProfileIn( __METHOD__ );
376
377 $text = preg_replace_callback(
378 '/<!--(LINK|IWLINK) (.*?)-->/',
379 array( &$this, 'replaceTextCallback' ),
380 $text );
381
382 wfProfileOut( __METHOD__ );
383 return $text;
384 }
385
386 /**
387 * @param array $matches
388 * @return string
389 * @private
390 */
391 function replaceTextCallback( $matches ) {
392 $type = $matches[1];
393 $key = $matches[2];
394 if( $type == 'LINK' ) {
395 list( $ns, $index ) = explode( ':', $key, 2 );
396 if( isset( $this->internals[$ns][$index]['text'] ) ) {
397 return $this->internals[$ns][$index]['text'];
398 }
399 } elseif( $type == 'IWLINK' ) {
400 if( isset( $this->interwikis[$key]['text'] ) ) {
401 return $this->interwikis[$key]['text'];
402 }
403 }
404 return $matches[0];
405 }
406 }