Followup r96479, move files to WikimediaMaintenance

author Sam Reed <reedy@users.mediawiki.org>

Wed, 7 Sep 2011 20:50:40 +0000 (20:50 +0000)

committer Sam Reed <reedy@users.mediawiki.org>

Wed, 7 Sep 2011 20:50:40 +0000 (20:50 +0000)
author Sam Reed <reedy@users.mediawiki.org>
Wed, 7 Sep 2011 20:50:40 +0000 (20:50 +0000)
committer Sam Reed <reedy@users.mediawiki.org>
Wed, 7 Sep 2011 20:50:40 +0000 (20:50 +0000)
diff --git a/maintenance/storage/storageTypeStatsDiff.py b/maintenance/storage/storageTypeStatsDiff.py

deleted file mode 100755 (executable)

index 90a9e1c..0000000
--- a/maintenance/storage/storageTypeStatsDiff.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/python
-
-
-"""
-
-    For more detail, see http://wikitech.wikimedia.org/view/Text_storage_data
-
-    reads in two files which should contain the output of storageTypeStatsSum.py
-    Parses them both and calculates the difference for each storage type
-    prints this to stdout.
-
-    For best results, give the old and new files their dates for names, eg:
-    ben@fenari:~/storageStats$ ./storageTypeStatsDiff.py 2010-02-18 2011-08-31
-
-    Example content:
-
-ben@fenari:~/storageStats$ cat 2010-02-18
-Results:
-       Count    Type
-------------------------------------------
-           9    0,external/simple pointer
-         435    0/[none]
-     1482941    [none]/[none]
-      968957    gzip/[none]
-      178234    object,external/simple pointer
-        1800    object,utf-8/[none]
-    17076928    utf-8,gzip/[none]
-        1269    utf-8/[none]
-all done!
-
-ben@fenari:~/storageStats$ cat 2011-08-31
-Results:
-       Count    Type
-------------------------------------------
-           9    0,external/simple pointer
-        1435    0/[none]
-     1002341    [none]/[none]
-     1234212    object,external/simple pointer
-         213    object,external/blob
-          20    object,utf-8/[none]
-      123428    utf-8,gzip/[none]
-         123    utf-8/[none]
-all done!
-
-"""
-
-
-import re
-import optparse
-
-##
-##  set up argument parsing.
-usage = "usage: %prog <old-stats-file> <new-stats-file>"
-desc = "Calculate the difference between two files containing storageTypeStatsSum.py output"
-parser = optparse.OptionParser(usage=usage, description=desc)
-(opts, args) = parser.parse_args()
-# Require exactly two arguments
-if len(args) != 2:
-    print "Two files needed."
-    parser.print_help()
-    exit()
-
-try:
-    oldfile=open(args[0], 'r')
-    newfile=open(args[1], 'r')
-except IOError, e:
-    print "IOError trying to open %s or %s: %s\n" % (args[0], args[1], e)
-    exit(1)
-
-# match only the actual value / key lines; ignore everything else
-valueline = re.compile("^ *(?P<val>\d+) *(?P<desc>.*)$")
-
-files={}
-# ok, parse the files and collect stats!
-for file in (oldfile, newfile):
-    stats = {}
-    for line in file:
-        match = valueline.match(line)
-        if match:
-            stats[match.group('desc')] = int(match.group('val'))
-    #stats collected for one file, save it to the files dict
-    files[file.name] = stats
-
-# calculate the difference
-diff = {} # contains numbers keyed on storage types
-allkeys = []
-# collect keys from both sets in case they don't match
-for stats in files.keys():
-    # get the union of allkeys and this file's stats keys
-    allkeys = list( set(allkeys) | set(files[stats].keys()) )
-for key in allkeys:
-    try:
-        diff[key] = files[newfile.name][key] - files[oldfile.name][key]
-    except KeyError:
-        # this happens when a key only exists in one set
-        diff[key] = 'n/a'
-
-# print out results
-print "%12s %12s %12s   %s" % (oldfile.name, newfile.name, 'Diff', 'Type')
-print "---------------------------------------------------------------------"
-for key in sorted(allkeys):
-    try:
-        oldval = files[oldfile.name][key]
-    except KeyError:
-        oldval = 'n/a'
-    try:
-        newval = files[newfile.name][key]
-    except KeyError:
-        newval = 'n/a'
-    diffnum = diff[key]
-    name = key
-    print "%12s %12s %12s   %s" % (oldval, newval, diffnum, name)
-
diff --git a/maintenance/storage/storageTypeStatsSum.py b/maintenance/storage/storageTypeStatsSum.py

deleted file mode 100755 (executable)

index b072657..0000000
--- a/maintenance/storage/storageTypeStatsSum.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/python
-
-
-"""
-
-    For more detail, see http://wikitech.wikimedia.org/view/Text_storage_data
-
-
-    reads in a file which should contain the output of
-    ben@hume:~$ /home/w/bin/foreachwiki maintenance/storage/storageTypeStats.php > /tmp/storageTypeStats.log
-    Parses it and sums up the values for all wikis.
-    prints this sum to stdout.
-
-    Example content:
-
-ben@fenari:~/storageStats$ cat sample_output.txt
------------------------------------------------------------------
-aawiki
------------------------------------------------------------------
-aawiki:  Using bin size of 100
-aawiki:  0^M1000^M2000^M3000^M4000^M5000^M6000^M7000^M8000^M9000^M10000^M
-aawiki:  
-aawiki:  Flags                         Class                                   Count               old_id range                 
-aawiki:  ------------------------------------------------------------------------------------------------------------------------
-aawiki:  gzip                          [none]                                  4568                0             - 4700         
-aawiki:  [none]                        [none]                                  1615                4600          - 6300         
-aawiki:  utf-8,gzip                    [none]                                  1883                5300          - 8300         
-aawiki:  external,utf-8                CGZ pointer                             626                 6200          - 10300        
-aawiki:  external,utf-8                DHB pointer                             368                 9100          - 10300        
-aawiki:  utf-8,gzip,external           simple pointer                          975                 8200          - 10400        
-aawiki:  external,utf8                 DHB pointer                             211                 9400          - 10200        
------------------------------------------------------------------
-aawikibooks
------------------------------------------------------------------
-aawikibooks:  Using bin size of 100
-aawikibooks:  0^M1000^M2000^M3000^M
-aawikibooks:  
-aawikibooks:  Flags                         Class                                   Count               old_id range                 
-aawikibooks:  ------------------------------------------------------------------------------------------------------------------------
-aawikibooks:  [none]                        [none]                                  881                 0             - 1000         
-aawikibooks:  external,utf-8                CGZ pointer                             187                 0             - 3400         
-aawikibooks:  external,utf-8                DHB pointer                             34                  3200          - 3400         
-aawikibooks:  object                        historyblobcurstub                      898                 900           - 1900         
-aawikibooks:  utf-8,gzip                    [none]                                  900                 1800          - 2900         
-aawikibooks:  utf-8,gzip,external           simple pointer                          431                 2800          - 3400         
-aawikibooks:  external,utf8                 DHB pointer                             25                  3300          - 3400         
-
-"""
-
-
-import re
-import optparse
-
-##
-##  set up argument parsing.  Require --input (or -i) and a filename.
-usage = "usage: %prog <input>"
-desc = """Sum the storage types across all wikis.  The input file should
-contain the output of:
-   foreachwiki maintenance/storage/storageTypeStats.php
-"""
-
-parser = optparse.OptionParser(usage=usage, description=desc)
-(opts, args) = parser.parse_args()
-if len(args) != 1:
-    print "I can't do anything without a file to parse. Sorry!"
-    parser.print_help()
-    exit(1)
-
-input = args[0]
-
-try:
-    file=open(input, 'r')
-
-    # create a bunch of regexes to match various sections of the file
-    # a section starts with nothing on the line but the name of the wiki db
-    #aawikibooks
-    start_section = re.compile("^(?P<dbname>[a-z0-9_]+)$")
-    #aawikibooks:  external,utf-8     DHB pointer      34    3200    - 3400
-    counter = re.compile("^[a-z0-9_]*: *(?P<flags>[^ ]+)  +(?P<class>[^ ]+ [^ ]*)  +(?P<count>\d+)  +.*")
-
-    # create a bunch of counters
-    wiki_count=0
-    content_counters = dict()
-
-    # ok, parse the file and collect stats!
-    for line in file:
-        match = start_section.match(line)
-        if match:
-            # this isn't actually used yet, but is in here for when we 
-            # want more interesting stats and collect per-db
-            wiki_count += 1
-            db_name=match.group('dbname')
-        match = counter.match(line)
-        if match:
-            # sum all unique class,flags combinations
-            key = "%s/%s" % (match.group('flags'), match.group('class'))
-            try:
-                content_counters[key] += int(match.group('count'))
-            except KeyError:
-                content_counters[key] = int(match.group('count'))
-
-
-except IOError, e:
-    print "omg io error %s!" % e
-    raise e
-
-print "Results:"
-print "       Count    Type"
-print "------------------------------------------"
-for key in sorted(content_counters.keys()):
-    print "%12d    %s" % (content_counters[key], key)
-print "all done!"
-
author	Sam Reed <reedy@users.mediawiki.org>
	Wed, 7 Sep 2011 20:50:40 +0000 (20:50 +0000)
committer	Sam Reed <reedy@users.mediawiki.org>
	Wed, 7 Sep 2011 20:50:40 +0000 (20:50 +0000)
maintenance/storage/storageTypeStatsDiff.py	[deleted file]	patch \| blob \| history
maintenance/storage/storageTypeStatsSum.py	[deleted file]	patch \| blob \| history