6 For more detail, see http://wikitech.wikimedia.org/view/Text_storage_data
9 reads in a file which should contain the output of
10 ben@hume:~$ /home/w/bin/foreachwiki maintenance/storage/storageTypeStats.php > /tmp/storageTypeStats.log
11 Parses it and sums up the values for all wikis.
12 prints this sum to stdout.
16 ben@fenari:~/storageStats$ cat sample_output.txt
17 -----------------------------------------------------------------
19 -----------------------------------------------------------------
20 aawiki: Using bin size of 100
21 aawiki: 0^M1000^M2000^M3000^M4000^M5000^M6000^M7000^M8000^M9000^M10000^M
23 aawiki: Flags Class Count old_id range
24 aawiki: ------------------------------------------------------------------------------------------------------------------------
25 aawiki: gzip [none] 4568 0 - 4700
26 aawiki: [none] [none] 1615 4600 - 6300
27 aawiki: utf-8,gzip [none] 1883 5300 - 8300
28 aawiki: external,utf-8 CGZ pointer 626 6200 - 10300
29 aawiki: external,utf-8 DHB pointer 368 9100 - 10300
30 aawiki: utf-8,gzip,external simple pointer 975 8200 - 10400
31 aawiki: external,utf8 DHB pointer 211 9400 - 10200
32 -----------------------------------------------------------------
34 -----------------------------------------------------------------
35 aawikibooks: Using bin size of 100
36 aawikibooks: 0^M1000^M2000^M3000^M
38 aawikibooks: Flags Class Count old_id range
39 aawikibooks: ------------------------------------------------------------------------------------------------------------------------
40 aawikibooks: [none] [none] 881 0 - 1000
41 aawikibooks: external,utf-8 CGZ pointer 187 0 - 3400
42 aawikibooks: external,utf-8 DHB pointer 34 3200 - 3400
43 aawikibooks: object historyblobcurstub 898 900 - 1900
44 aawikibooks: utf-8,gzip [none] 900 1800 - 2900
45 aawikibooks: utf-8,gzip,external simple pointer 431 2800 - 3400
46 aawikibooks: external,utf8 DHB pointer 25 3300 - 3400
55 ## set up argument parsing. Require --input (or -i) and a filename.
56 usage
= "usage: %prog <input>"
57 desc
= """Sum the storage types across all wikis. The input file should
58 contain the output of:
59 foreachwiki maintenance/storage/storageTypeStats.php
62 parser
= optparse
.OptionParser(usage
=usage
, description
=desc
)
63 (opts
, args
) = parser
.parse_args()
65 print "I can't do anything without a file to parse. Sorry!"
74 # create a bunch of regexes to match various sections of the file
75 # a section starts with nothing on the line but the name of the wiki db
77 start_section
= re
.compile("^(?P<dbname>[a-z0-9_]+)$")
78 #aawikibooks: external,utf-8 DHB pointer 34 3200 - 3400
79 counter
= re
.compile("^[a-z0-9_]*: *(?P<flags>[^ ]+) +(?P<class>[^ ]+ [^ ]*) +(?P<count>\d+) +.*")
81 # create a bunch of counters
83 content_counters
= dict()
85 # ok, parse the file and collect stats!
87 match
= start_section
.match(line
)
89 # this isn't actually used yet, but is in here for when we
90 # want more interesting stats and collect per-db
92 db_name
=match
.group('dbname')
93 match
= counter
.match(line
)
95 # sum all unique class,flags combinations
96 key
= "%s/%s" % (match
.group('flags'), match
.group('class'))
98 content_counters
[key
] += int(match
.group('count'))
100 content_counters
[key
] = int(match
.group('count'))
104 print "omg io error %s!" % e
109 print "------------------------------------------"
110 for key
in sorted(content_counters
.keys()):
111 print "%12d %s" % (content_counters
[key
], key
)