Move addWiki, dumpInterwiki, ourusers, rebuildInterwiki, renameWiki and Site to Wikim...
[lhc/web/wiklou.git] / maintenance / storage / storageTypeStatsSum.py
1 #!/usr/bin/python
2
3
4 """
5
6 For more detail, see http://wikitech.wikimedia.org/view/Text_storage_data
7
8
9 reads in a file which should contain the output of
10 ben@hume:~$ /home/w/bin/foreachwiki maintenance/storage/storageTypeStats.php > /tmp/storageTypeStats.log
11 Parses it and sums up the values for all wikis.
12 prints this sum to stdout.
13
14 Example content:
15
16 ben@fenari:~/storageStats$ cat sample_output.txt
17 -----------------------------------------------------------------
18 aawiki
19 -----------------------------------------------------------------
20 aawiki: Using bin size of 100
21 aawiki: 0^M1000^M2000^M3000^M4000^M5000^M6000^M7000^M8000^M9000^M10000^M
22 aawiki:
23 aawiki: Flags Class Count old_id range
24 aawiki: ------------------------------------------------------------------------------------------------------------------------
25 aawiki: gzip [none] 4568 0 - 4700
26 aawiki: [none] [none] 1615 4600 - 6300
27 aawiki: utf-8,gzip [none] 1883 5300 - 8300
28 aawiki: external,utf-8 CGZ pointer 626 6200 - 10300
29 aawiki: external,utf-8 DHB pointer 368 9100 - 10300
30 aawiki: utf-8,gzip,external simple pointer 975 8200 - 10400
31 aawiki: external,utf8 DHB pointer 211 9400 - 10200
32 -----------------------------------------------------------------
33 aawikibooks
34 -----------------------------------------------------------------
35 aawikibooks: Using bin size of 100
36 aawikibooks: 0^M1000^M2000^M3000^M
37 aawikibooks:
38 aawikibooks: Flags Class Count old_id range
39 aawikibooks: ------------------------------------------------------------------------------------------------------------------------
40 aawikibooks: [none] [none] 881 0 - 1000
41 aawikibooks: external,utf-8 CGZ pointer 187 0 - 3400
42 aawikibooks: external,utf-8 DHB pointer 34 3200 - 3400
43 aawikibooks: object historyblobcurstub 898 900 - 1900
44 aawikibooks: utf-8,gzip [none] 900 1800 - 2900
45 aawikibooks: utf-8,gzip,external simple pointer 431 2800 - 3400
46 aawikibooks: external,utf8 DHB pointer 25 3300 - 3400
47
48 """
49
50
51 import re
52 import optparse
53
54 ##
55 ## set up argument parsing. Require --input (or -i) and a filename.
56 usage = "usage: %prog <input>"
57 desc = """Sum the storage types across all wikis. The input file should
58 contain the output of:
59 foreachwiki maintenance/storage/storageTypeStats.php
60 """
61
62 parser = optparse.OptionParser(usage=usage, description=desc)
63 (opts, args) = parser.parse_args()
64 if len(args) != 1:
65 print "I can't do anything without a file to parse. Sorry!"
66 parser.print_help()
67 exit(1)
68
69 input = args[0]
70
71 try:
72 file=open(input, 'r')
73
74 # create a bunch of regexes to match various sections of the file
75 # a section starts with nothing on the line but the name of the wiki db
76 #aawikibooks
77 start_section = re.compile("^(?P<dbname>[a-z0-9_]+)$")
78 #aawikibooks: external,utf-8 DHB pointer 34 3200 - 3400
79 counter = re.compile("^[a-z0-9_]*: *(?P<flags>[^ ]+) +(?P<class>[^ ]+ [^ ]*) +(?P<count>\d+) +.*")
80
81 # create a bunch of counters
82 wiki_count=0
83 content_counters = dict()
84
85 # ok, parse the file and collect stats!
86 for line in file:
87 match = start_section.match(line)
88 if match:
89 # this isn't actually used yet, but is in here for when we
90 # want more interesting stats and collect per-db
91 wiki_count += 1
92 db_name=match.group('dbname')
93 match = counter.match(line)
94 if match:
95 # sum all unique class,flags combinations
96 key = "%s/%s" % (match.group('flags'), match.group('class'))
97 try:
98 content_counters[key] += int(match.group('count'))
99 except KeyError:
100 content_counters[key] = int(match.group('count'))
101
102
103 except IOError, e:
104 print "omg io error %s!" % e
105 raise e
106
107 print "Results:"
108 print " Count Type"
109 print "------------------------------------------"
110 for key in sorted(content_counters.keys()):
111 print "%12d %s" % (content_counters[key], key)
112 print "all done!"
113