Add option to expose original sha1 in thumb url
[lhc/web/wiklou.git] / includes / filerepo / LocalRepo.php
1 <?php
2 /**
3 * Local repository that stores files in the local filesystem and registers them
4 * in the wiki's own database.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup FileRepo
23 */
24
25 /**
26 * A repository that stores files in the local filesystem and registers them
27 * in the wiki's own database. This is the most commonly used repository class.
28 *
29 * @ingroup FileRepo
30 */
31 class LocalRepo extends FileRepo {
32 /** @var array */
33 protected $fileFactory = array( 'LocalFile', 'newFromTitle' );
34
35 /** @var array */
36 protected $fileFactoryKey = array( 'LocalFile', 'newFromKey' );
37
38 /** @var array */
39 protected $fileFromRowFactory = array( 'LocalFile', 'newFromRow' );
40
41 /** @var array */
42 protected $oldFileFromRowFactory = array( 'OldLocalFile', 'newFromRow' );
43
44 /** @var array */
45 protected $oldFileFactory = array( 'OldLocalFile', 'newFromTitle' );
46
47 /** @var array */
48 protected $oldFileFactoryKey = array( 'OldLocalFile', 'newFromKey' );
49
50 function __construct( array $info = null ) {
51 parent::__construct( $info );
52
53 $this->hasSha1Storage = isset( $info['storageLayout'] ) && $info['storageLayout'] === 'sha1';
54
55 if ( $this->hasSha1Storage() ) {
56 $this->backend = new FileBackendDBRepoWrapper( array(
57 'backend' => $this->backend,
58 'repoName' => $this->name,
59 'dbHandleFactory' => $this->getDBFactory()
60 ) );
61 }
62 }
63
64 /**
65 * @throws MWException
66 * @param stdClass $row
67 * @return LocalFile
68 */
69 function newFileFromRow( $row ) {
70 if ( isset( $row->img_name ) ) {
71 return call_user_func( $this->fileFromRowFactory, $row, $this );
72 } elseif ( isset( $row->oi_name ) ) {
73 return call_user_func( $this->oldFileFromRowFactory, $row, $this );
74 } else {
75 throw new MWException( __METHOD__ . ': invalid row' );
76 }
77 }
78
79 /**
80 * @param Title $title
81 * @param string $archiveName
82 * @return OldLocalFile
83 */
84 function newFromArchiveName( $title, $archiveName ) {
85 return OldLocalFile::newFromArchiveName( $title, $this, $archiveName );
86 }
87
88 /**
89 * Delete files in the deleted directory if they are not referenced in the
90 * filearchive table. This needs to be done in the repo because it needs to
91 * interleave database locks with file operations, which is potentially a
92 * remote operation.
93 *
94 * @param array $storageKeys
95 *
96 * @return FileRepoStatus
97 */
98 function cleanupDeletedBatch( array $storageKeys ) {
99 if ( $this->hasSha1Storage() ) {
100 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
101 return Status::newGood();
102 }
103
104 $backend = $this->backend; // convenience
105 $root = $this->getZonePath( 'deleted' );
106 $dbw = $this->getMasterDB();
107 $status = $this->newGood();
108 $storageKeys = array_unique( $storageKeys );
109 foreach ( $storageKeys as $key ) {
110 $hashPath = $this->getDeletedHashPath( $key );
111 $path = "$root/$hashPath$key";
112 $dbw->startAtomic( __METHOD__ );
113 // Check for usage in deleted/hidden files and preemptively
114 // lock the key to avoid any future use until we are finished.
115 $deleted = $this->deletedFileHasKey( $key, 'lock' );
116 $hidden = $this->hiddenFileHasKey( $key, 'lock' );
117 if ( !$deleted && !$hidden ) { // not in use now
118 wfDebug( __METHOD__ . ": deleting $key\n" );
119 $op = array( 'op' => 'delete', 'src' => $path );
120 if ( !$backend->doOperation( $op )->isOK() ) {
121 $status->error( 'undelete-cleanup-error', $path );
122 $status->failCount++;
123 }
124 } else {
125 wfDebug( __METHOD__ . ": $key still in use\n" );
126 $status->successCount++;
127 }
128 $dbw->endAtomic( __METHOD__ );
129 }
130
131 return $status;
132 }
133
134 /**
135 * Check if a deleted (filearchive) file has this sha1 key
136 *
137 * @param string $key File storage key (base-36 sha1 key with file extension)
138 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
139 * @return bool File with this key is in use
140 */
141 protected function deletedFileHasKey( $key, $lock = null ) {
142 $options = ( $lock === 'lock' ) ? array( 'FOR UPDATE' ) : array();
143
144 $dbw = $this->getMasterDB();
145
146 return (bool)$dbw->selectField( 'filearchive', '1',
147 array( 'fa_storage_group' => 'deleted', 'fa_storage_key' => $key ),
148 __METHOD__, $options
149 );
150 }
151
152 /**
153 * Check if a hidden (revision delete) file has this sha1 key
154 *
155 * @param string $key File storage key (base-36 sha1 key with file extension)
156 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
157 * @return bool File with this key is in use
158 */
159 protected function hiddenFileHasKey( $key, $lock = null ) {
160 $options = ( $lock === 'lock' ) ? array( 'FOR UPDATE' ) : array();
161
162 $sha1 = self::getHashFromKey( $key );
163 $ext = File::normalizeExtension( substr( $key, strcspn( $key, '.' ) + 1 ) );
164
165 $dbw = $this->getMasterDB();
166
167 return (bool)$dbw->selectField( 'oldimage', '1',
168 array( 'oi_sha1' => $sha1,
169 'oi_archive_name ' . $dbw->buildLike( $dbw->anyString(), ".$ext" ),
170 $dbw->bitAnd( 'oi_deleted', File::DELETED_FILE ) => File::DELETED_FILE ),
171 __METHOD__, $options
172 );
173 }
174
175 /**
176 * Gets the SHA1 hash from a storage key
177 *
178 * @param string $key
179 * @return string
180 */
181 public static function getHashFromKey( $key ) {
182 return strtok( $key, '.' );
183 }
184
185 /**
186 * Checks if there is a redirect named as $title
187 *
188 * @param Title $title Title of file
189 * @return bool|Title
190 */
191 function checkRedirect( Title $title ) {
192 $cache = ObjectCache::getMainWANInstance();
193
194 $title = File::normalizeTitle( $title, 'exception' );
195
196 $memcKey = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
197 if ( $memcKey === false ) {
198 $memcKey = $this->getLocalCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
199 $expiry = 300; // no invalidation, 5 minutes
200 } else {
201 $expiry = 86400; // has invalidation, 1 day
202 }
203 $cachedValue = $cache->get( $memcKey );
204 if ( $cachedValue === ' ' || $cachedValue === '' ) {
205 // Does not exist
206 return false;
207 } elseif ( strval( $cachedValue ) !== '' && $cachedValue !== ' PURGED' ) {
208 return Title::newFromText( $cachedValue, NS_FILE );
209 } // else $cachedValue is false or null: cache miss
210
211 $id = $this->getArticleID( $title );
212 if ( !$id ) {
213 $cache->set( $memcKey, " ", $expiry );
214
215 return false;
216 }
217 $dbr = $this->getSlaveDB();
218 $row = $dbr->selectRow(
219 'redirect',
220 array( 'rd_title', 'rd_namespace' ),
221 array( 'rd_from' => $id ),
222 __METHOD__
223 );
224
225 if ( $row && $row->rd_namespace == NS_FILE ) {
226 $targetTitle = Title::makeTitle( $row->rd_namespace, $row->rd_title );
227 $cache->set( $memcKey, $targetTitle->getDBkey(), $expiry );
228
229 return $targetTitle;
230 } else {
231 $cache->set( $memcKey, '', $expiry );
232
233 return false;
234 }
235 }
236
237 /**
238 * Function link Title::getArticleID().
239 * We can't say Title object, what database it should use, so we duplicate that function here.
240 *
241 * @param Title $title
242 * @return bool|int|mixed
243 */
244 protected function getArticleID( $title ) {
245 if ( !$title instanceof Title ) {
246 return 0;
247 }
248 $dbr = $this->getSlaveDB();
249 $id = $dbr->selectField(
250 'page', // Table
251 'page_id', //Field
252 array( //Conditions
253 'page_namespace' => $title->getNamespace(),
254 'page_title' => $title->getDBkey(),
255 ),
256 __METHOD__ //Function name
257 );
258
259 return $id;
260 }
261
262 public function findFiles( array $items, $flags = 0 ) {
263 $finalFiles = array(); // map of (DB key => corresponding File) for matches
264
265 $searchSet = array(); // map of (normalized DB key => search params)
266 foreach ( $items as $item ) {
267 if ( is_array( $item ) ) {
268 $title = File::normalizeTitle( $item['title'] );
269 if ( $title ) {
270 $searchSet[$title->getDBkey()] = $item;
271 }
272 } else {
273 $title = File::normalizeTitle( $item );
274 if ( $title ) {
275 $searchSet[$title->getDBkey()] = array();
276 }
277 }
278 }
279
280 $fileMatchesSearch = function ( File $file, array $search ) {
281 // Note: file name comparison done elsewhere (to handle redirects)
282 $user = ( !empty( $search['private'] ) && $search['private'] instanceof User )
283 ? $search['private']
284 : null;
285
286 return (
287 $file->exists() &&
288 (
289 ( empty( $search['time'] ) && !$file->isOld() ) ||
290 ( !empty( $search['time'] ) && $search['time'] === $file->getTimestamp() )
291 ) &&
292 ( !empty( $search['private'] ) || !$file->isDeleted( File::DELETED_FILE ) ) &&
293 $file->userCan( File::DELETED_FILE, $user )
294 );
295 };
296
297 $that = $this;
298 $applyMatchingFiles = function ( ResultWrapper $res, &$searchSet, &$finalFiles )
299 use ( $that, $fileMatchesSearch, $flags )
300 {
301 global $wgContLang;
302 $info = $that->getInfo();
303 foreach ( $res as $row ) {
304 $file = $that->newFileFromRow( $row );
305 // There must have been a search for this DB key, but this has to handle the
306 // cases were title capitalization is different on the client and repo wikis.
307 $dbKeysLook = array( strtr( $file->getName(), ' ', '_' ) );
308 if ( !empty( $info['initialCapital'] ) ) {
309 // Search keys for "hi.png" and "Hi.png" should use the "Hi.png file"
310 $dbKeysLook[] = $wgContLang->lcfirst( $file->getName() );
311 }
312 foreach ( $dbKeysLook as $dbKey ) {
313 if ( isset( $searchSet[$dbKey] )
314 && $fileMatchesSearch( $file, $searchSet[$dbKey] )
315 ) {
316 $finalFiles[$dbKey] = ( $flags & FileRepo::NAME_AND_TIME_ONLY )
317 ? array( 'title' => $dbKey, 'timestamp' => $file->getTimestamp() )
318 : $file;
319 unset( $searchSet[$dbKey] );
320 }
321 }
322 }
323 };
324
325 $dbr = $this->getSlaveDB();
326
327 // Query image table
328 $imgNames = array();
329 foreach ( array_keys( $searchSet ) as $dbKey ) {
330 $imgNames[] = $this->getNameFromTitle( File::normalizeTitle( $dbKey ) );
331 }
332
333 if ( count( $imgNames ) ) {
334 $res = $dbr->select( 'image',
335 LocalFile::selectFields(), array( 'img_name' => $imgNames ), __METHOD__ );
336 $applyMatchingFiles( $res, $searchSet, $finalFiles );
337 }
338
339 // Query old image table
340 $oiConds = array(); // WHERE clause array for each file
341 foreach ( $searchSet as $dbKey => $search ) {
342 if ( isset( $search['time'] ) ) {
343 $oiConds[] = $dbr->makeList(
344 array(
345 'oi_name' => $this->getNameFromTitle( File::normalizeTitle( $dbKey ) ),
346 'oi_timestamp' => $dbr->timestamp( $search['time'] )
347 ),
348 LIST_AND
349 );
350 }
351 }
352
353 if ( count( $oiConds ) ) {
354 $res = $dbr->select( 'oldimage',
355 OldLocalFile::selectFields(), $dbr->makeList( $oiConds, LIST_OR ), __METHOD__ );
356 $applyMatchingFiles( $res, $searchSet, $finalFiles );
357 }
358
359 // Check for redirects...
360 foreach ( $searchSet as $dbKey => $search ) {
361 if ( !empty( $search['ignoreRedirect'] ) ) {
362 continue;
363 }
364
365 $title = File::normalizeTitle( $dbKey );
366 $redir = $this->checkRedirect( $title ); // hopefully hits memcached
367
368 if ( $redir && $redir->getNamespace() == NS_FILE ) {
369 $file = $this->newFile( $redir );
370 if ( $file && $fileMatchesSearch( $file, $search ) ) {
371 $file->redirectedFrom( $title->getDBkey() );
372 if ( $flags & FileRepo::NAME_AND_TIME_ONLY ) {
373 $finalFiles[$dbKey] = array(
374 'title' => $file->getTitle()->getDBkey(),
375 'timestamp' => $file->getTimestamp()
376 );
377 } else {
378 $finalFiles[$dbKey] = $file;
379 }
380 }
381 }
382 }
383
384 return $finalFiles;
385 }
386
387 /**
388 * Get an array or iterator of file objects for files that have a given
389 * SHA-1 content hash.
390 *
391 * @param string $hash A sha1 hash to look for
392 * @return File[]
393 */
394 function findBySha1( $hash ) {
395 $dbr = $this->getSlaveDB();
396 $res = $dbr->select(
397 'image',
398 LocalFile::selectFields(),
399 array( 'img_sha1' => $hash ),
400 __METHOD__,
401 array( 'ORDER BY' => 'img_name' )
402 );
403
404 $result = array();
405 foreach ( $res as $row ) {
406 $result[] = $this->newFileFromRow( $row );
407 }
408 $res->free();
409
410 return $result;
411 }
412
413 /**
414 * Get an array of arrays or iterators of file objects for files that
415 * have the given SHA-1 content hashes.
416 *
417 * Overrides generic implementation in FileRepo for performance reason
418 *
419 * @param array $hashes An array of hashes
420 * @return array An Array of arrays or iterators of file objects and the hash as key
421 */
422 function findBySha1s( array $hashes ) {
423 if ( !count( $hashes ) ) {
424 return array(); //empty parameter
425 }
426
427 $dbr = $this->getSlaveDB();
428 $res = $dbr->select(
429 'image',
430 LocalFile::selectFields(),
431 array( 'img_sha1' => $hashes ),
432 __METHOD__,
433 array( 'ORDER BY' => 'img_name' )
434 );
435
436 $result = array();
437 foreach ( $res as $row ) {
438 $file = $this->newFileFromRow( $row );
439 $result[$file->getSha1()][] = $file;
440 }
441 $res->free();
442
443 return $result;
444 }
445
446 /**
447 * Return an array of files where the name starts with $prefix.
448 *
449 * @param string $prefix The prefix to search for
450 * @param int $limit The maximum amount of files to return
451 * @return array
452 */
453 public function findFilesByPrefix( $prefix, $limit ) {
454 $selectOptions = array( 'ORDER BY' => 'img_name', 'LIMIT' => intval( $limit ) );
455
456 // Query database
457 $dbr = $this->getSlaveDB();
458 $res = $dbr->select(
459 'image',
460 LocalFile::selectFields(),
461 'img_name ' . $dbr->buildLike( $prefix, $dbr->anyString() ),
462 __METHOD__,
463 $selectOptions
464 );
465
466 // Build file objects
467 $files = array();
468 foreach ( $res as $row ) {
469 $files[] = $this->newFileFromRow( $row );
470 }
471
472 return $files;
473 }
474
475 /**
476 * Get a connection to the slave DB
477 * @return DatabaseBase
478 */
479 function getSlaveDB() {
480 return wfGetDB( DB_SLAVE );
481 }
482
483 /**
484 * Get a connection to the master DB
485 * @return DatabaseBase
486 */
487 function getMasterDB() {
488 return wfGetDB( DB_MASTER );
489 }
490
491 /**
492 * Get a callback to get a DB handle given an index (DB_SLAVE/DB_MASTER)
493 * @return Closure
494 */
495 protected function getDBFactory() {
496 return function( $index ) {
497 return wfGetDB( $index );
498 };
499 }
500
501 /**
502 * Get a key on the primary cache for this repository.
503 * Returns false if the repository's cache is not accessible at this site.
504 * The parameters are the parts of the key, as for wfMemcKey().
505 *
506 * @return string
507 */
508 function getSharedCacheKey( /*...*/ ) {
509 $args = func_get_args();
510
511 return call_user_func_array( 'wfMemcKey', $args );
512 }
513
514 /**
515 * Invalidates image redirect cache related to that image
516 *
517 * @param Title $title Title of page
518 * @return void
519 */
520 function invalidateImageRedirect( Title $title ) {
521 $cache = ObjectCache::getMainWANInstance();
522
523 $memcKey = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
524 if ( $memcKey ) {
525 // Set a temporary value for the cache key, to ensure
526 // that this value stays purged long enough so that
527 // it isn't refreshed with a stale value due to a
528 // lagged slave.
529 $cache->delete( $memcKey, 12 );
530 }
531 }
532
533 /**
534 * Return information about the repository.
535 *
536 * @return array
537 * @since 1.22
538 */
539 function getInfo() {
540 global $wgFavicon;
541
542 return array_merge( parent::getInfo(), array(
543 'favicon' => wfExpandUrl( $wgFavicon ),
544 ) );
545 }
546
547 public function store( $srcPath, $dstZone, $dstRel, $flags = 0 ) {
548 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
549 }
550
551 public function storeBatch( array $triplets, $flags = 0 ) {
552 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
553 }
554
555 public function cleanupBatch( array $files, $flags = 0 ) {
556 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
557 }
558
559 public function publish(
560 $srcPath,
561 $dstRel,
562 $archiveRel,
563 $flags = 0,
564 array $options = array()
565 ) {
566 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
567 }
568
569 public function publishBatch( array $ntuples, $flags = 0 ) {
570 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
571 }
572
573 public function delete( $srcRel, $archiveRel ) {
574 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
575 }
576
577 public function deleteBatch( array $sourceDestPairs ) {
578 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
579 }
580
581 /**
582 * Skips the write operation if storage is sha1-based, executes it normally otherwise
583 *
584 * @param string $function
585 * @param array $args
586 * @return FileRepoStatus
587 */
588 protected function skipWriteOperationIfSha1( $function, array $args ) {
589 $this->assertWritableRepo(); // fail out if read-only
590
591 if ( $this->hasSha1Storage() ) {
592 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
593 return Status::newGood();
594 } else {
595 return call_user_func_array( 'parent::' . $function, $args );
596 }
597 }
598 }