Classify rows with old_flags='object,utf-8' (there are 1800 in enwiki).
[lhc/web/wiklou.git] / maintenance / storage / storageTypeStats.php
1 <?php
2
3 require_once( dirname(__FILE__).'/../Maintenance.php' );
4
5 class StorageTypeStats extends Maintenance {
6 function execute() {
7 $dbr = wfGetDB( DB_SLAVE );
8
9 $endId = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
10 if ( !$endId ) {
11 echo "No text rows!\n";
12 exit( 1 );
13 }
14
15 $rangeStart = 0;
16 $binSize = intval( pow( 10, floor( log10( $endId ) ) - 3 ) );
17 if ( $binSize < 100 ) {
18 $binSize = 100;
19 }
20 echo "Using bin size of $binSize\n";
21
22 $stats = array();
23
24 $classSql = <<<SQL
25 IF(old_flags LIKE '%external%',
26 IF(old_text REGEXP '^DB://[[:alnum:]]+/[0-9]+/[0-9a-f]{32}$',
27 'CGZ pointer',
28 IF(old_text REGEXP '^DB://[[:alnum:]]+/[0-9]+/[0-9]{1,6}$',
29 'DHB pointer',
30 IF(old_text REGEXP '^DB://[[:alnum:]]+/[0-9]+$',
31 'simple pointer',
32 'UNKNOWN pointer'
33 )
34 )
35 ),
36 IF(old_flags LIKE '%object%',
37 TRIM('"' FROM SUBSTRING_INDEX(SUBSTRING_INDEX(old_text, ':', 3), ':', -1)),
38 '[none]'
39 )
40 )
41 SQL;
42
43 for ( $rangeStart = 0; $rangeStart < $endId; $rangeStart += $binSize ) {
44 if ( $rangeStart / $binSize % 10 == 0 ) {
45 echo "$rangeStart\r";
46 }
47 $res = $dbr->select(
48 'text',
49 array(
50 'old_flags',
51 "$classSql AS class",
52 'COUNT(*) as count',
53 ),
54 array(
55 'old_id >= ' . intval( $rangeStart ),
56 'old_id < ' . intval( $rangeStart + $binSize )
57 ),
58 __METHOD__,
59 array( 'GROUP BY' => 'old_flags, class' )
60 );
61
62 foreach ( $res as $row ) {
63 $flags = $row->old_flags;
64 if ( $flags === '' ) {
65 $flags = '[none]';
66 }
67 $class = $row->class;
68 $count = $row->count;
69 if ( !isset( $stats[$flags][$class] ) ) {
70 $stats[$flags][$class] = array(
71 'count' => 0,
72 'first' => $rangeStart,
73 'last' => 0
74 );
75 }
76 $entry =& $stats[$flags][$class];
77 $entry['count'] += $count;
78 $entry['last'] = max( $entry['last'], $rangeStart + $binSize );
79 unset( $entry );
80 }
81 }
82 echo "\n\n";
83
84 $format = "%-29s %-39s %-19s %-29s\n";
85 printf( $format, "Flags", "Class", "Count", "old_id range" );
86 echo str_repeat( '-', 120 ) . "\n";
87 foreach ( $stats as $flags => $flagStats ) {
88 foreach ( $flagStats as $class => $entry ) {
89 printf( $format, $flags, $class, $entry['count'],
90 sprintf( "%-13d - %-13d", $entry['first'], $entry['last'] ) );
91 }
92 }
93 }
94 }
95
96 $maintClass = 'StorageTypeStats';
97 require_once( DO_MAINTENANCE );
98