Fix ForeignAPIRepo::fileExistsBatch()
authorBrad Jorsch <bjorsch@wikimedia.org>
Wed, 26 Dec 2012 16:55:49 +0000 (11:55 -0500)
committerBrad Jorsch <bjorsch@wikimedia.org>
Wed, 9 Oct 2013 17:23:17 +0000 (13:23 -0400)
ForeignAPIRepo::fileExistsBatch() is extremely broken:
* If passed an array with numeric keys (e.g. as called from
  FileRepo::fileExists()), it will cache the fact that the item at index
  0 from the first call was found or not found, no matter what the
  actual file being queried is!
* But it doesn't even use its cache correctly. Rather than checking the
  value cached, it just assumes anything that was cached exists.
* It checks whether the page in the remote repository has a description
  page, not whether a file actually exists for the title.
* But it doesn't do that right either, so in the vast majority of cases
  it will decide everything exists.

All of this is offset by the fact that I can't find any code path that
would actually call this function.

Change-Id: I421e8892b642f5267b8dd755183ebd711db83152

includes/filerepo/ForeignAPIRepo.php

index b1a3962..5eec9a5 100644 (file)
@@ -110,8 +110,8 @@ class ForeignAPIRepo extends FileRepo {
        function fileExistsBatch( array $files ) {
                $results = array();
                foreach ( $files as $k => $f ) {
-                       if ( isset( $this->mFileExists[$k] ) ) {
-                               $results[$k] = true;
+                       if ( isset( $this->mFileExists[$f] ) ) {
+                               $results[$k] = $this->mFileExists[$f];
                                unset( $files[$k] );
                        } elseif ( self::isVirtualUrl( $f ) ) {
                                # @todo FIXME: We need to be able to handle virtual
@@ -129,10 +129,26 @@ class ForeignAPIRepo extends FileRepo {
                $data = $this->fetchImageQuery( array( 'titles' => implode( $files, '|' ),
                                                                                        'prop' => 'imageinfo' ) );
                if ( isset( $data['query']['pages'] ) ) {
-                       $i = 0;
+                       # First, get results from the query. Note we only care whether the image exists,
+                       # not whether it has a description page.
+                       foreach ( $data['query']['pages'] as $p ) {
+                               $this->mFileExists[$p['title']] = ( $p['imagerepository'] !== '' );
+                       }
+                       # Second, copy the results to any redirects that were queried
+                       if ( isset( $data['query']['redirects'] ) ) {
+                               foreach ( $data['query']['redirects'] as $r ) {
+                                       $this->mFileExists[$r['from']] = $this->mFileExists[$r['to']];
+                               }
+                       }
+                       # Third, copy the results to any non-normalized titles that were queried
+                       if ( isset( $data['query']['normalized'] ) ) {
+                               foreach ( $data['query']['normalized'] as $n ) {
+                                       $this->mFileExists[$n['from']] = $this->mFileExists[$n['to']];
+                               }
+                       }
+                       # Finally, copy the results to the output
                        foreach ( $files as $key => $file ) {
-                               $results[$key] = $this->mFileExists[$key] = !isset( $data['query']['pages'][$i]['missing'] );
-                               $i++;
+                               $results[$key] = $this->mFileExists[$file];
                        }
                }
                return $results;