From f2d4091af21fe9692931874d6b1c1e7056ced390 Mon Sep 17 00:00:00 2001 From: Sam Reed Date: Wed, 7 Sep 2011 20:50:40 +0000 Subject: [PATCH] Followup r96479, move files to WikimediaMaintenance Added missing svn:eol-style native Files still need updating to point back to the MW maintenance dir --- maintenance/storage/storageTypeStatsDiff.py | 113 -------------------- maintenance/storage/storageTypeStatsSum.py | 113 -------------------- 2 files changed, 226 deletions(-) delete mode 100755 maintenance/storage/storageTypeStatsDiff.py delete mode 100755 maintenance/storage/storageTypeStatsSum.py diff --git a/maintenance/storage/storageTypeStatsDiff.py b/maintenance/storage/storageTypeStatsDiff.py deleted file mode 100755 index 90a9e1cfd9..0000000000 --- a/maintenance/storage/storageTypeStatsDiff.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/python - - -""" - - For more detail, see http://wikitech.wikimedia.org/view/Text_storage_data - - reads in two files which should contain the output of storageTypeStatsSum.py - Parses them both and calculates the difference for each storage type - prints this to stdout. - - For best results, give the old and new files their dates for names, eg: - ben@fenari:~/storageStats$ ./storageTypeStatsDiff.py 2010-02-18 2011-08-31 - - Example content: - -ben@fenari:~/storageStats$ cat 2010-02-18 -Results: - Count Type ------------------------------------------- - 9 0,external/simple pointer - 435 0/[none] - 1482941 [none]/[none] - 968957 gzip/[none] - 178234 object,external/simple pointer - 1800 object,utf-8/[none] - 17076928 utf-8,gzip/[none] - 1269 utf-8/[none] -all done! - -ben@fenari:~/storageStats$ cat 2011-08-31 -Results: - Count Type ------------------------------------------- - 9 0,external/simple pointer - 1435 0/[none] - 1002341 [none]/[none] - 1234212 object,external/simple pointer - 213 object,external/blob - 20 object,utf-8/[none] - 123428 utf-8,gzip/[none] - 123 utf-8/[none] -all done! - -""" - - -import re -import optparse - -## -## set up argument parsing. -usage = "usage: %prog " -desc = "Calculate the difference between two files containing storageTypeStatsSum.py output" -parser = optparse.OptionParser(usage=usage, description=desc) -(opts, args) = parser.parse_args() -# Require exactly two arguments -if len(args) != 2: - print "Two files needed." - parser.print_help() - exit() - -try: - oldfile=open(args[0], 'r') - newfile=open(args[1], 'r') -except IOError, e: - print "IOError trying to open %s or %s: %s\n" % (args[0], args[1], e) - exit(1) - -# match only the actual value / key lines; ignore everything else -valueline = re.compile("^ *(?P\d+) *(?P.*)$") - -files={} -# ok, parse the files and collect stats! -for file in (oldfile, newfile): - stats = {} - for line in file: - match = valueline.match(line) - if match: - stats[match.group('desc')] = int(match.group('val')) - #stats collected for one file, save it to the files dict - files[file.name] = stats - -# calculate the difference -diff = {} # contains numbers keyed on storage types -allkeys = [] -# collect keys from both sets in case they don't match -for stats in files.keys(): - # get the union of allkeys and this file's stats keys - allkeys = list( set(allkeys) | set(files[stats].keys()) ) -for key in allkeys: - try: - diff[key] = files[newfile.name][key] - files[oldfile.name][key] - except KeyError: - # this happens when a key only exists in one set - diff[key] = 'n/a' - -# print out results -print "%12s %12s %12s %s" % (oldfile.name, newfile.name, 'Diff', 'Type') -print "---------------------------------------------------------------------" -for key in sorted(allkeys): - try: - oldval = files[oldfile.name][key] - except KeyError: - oldval = 'n/a' - try: - newval = files[newfile.name][key] - except KeyError: - newval = 'n/a' - diffnum = diff[key] - name = key - print "%12s %12s %12s %s" % (oldval, newval, diffnum, name) - diff --git a/maintenance/storage/storageTypeStatsSum.py b/maintenance/storage/storageTypeStatsSum.py deleted file mode 100755 index b07265758b..0000000000 --- a/maintenance/storage/storageTypeStatsSum.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/python - - -""" - - For more detail, see http://wikitech.wikimedia.org/view/Text_storage_data - - - reads in a file which should contain the output of - ben@hume:~$ /home/w/bin/foreachwiki maintenance/storage/storageTypeStats.php > /tmp/storageTypeStats.log - Parses it and sums up the values for all wikis. - prints this sum to stdout. - - Example content: - -ben@fenari:~/storageStats$ cat sample_output.txt ------------------------------------------------------------------ -aawiki ------------------------------------------------------------------ -aawiki: Using bin size of 100 -aawiki: 0^M1000^M2000^M3000^M4000^M5000^M6000^M7000^M8000^M9000^M10000^M -aawiki: -aawiki: Flags Class Count old_id range -aawiki: ------------------------------------------------------------------------------------------------------------------------ -aawiki: gzip [none] 4568 0 - 4700 -aawiki: [none] [none] 1615 4600 - 6300 -aawiki: utf-8,gzip [none] 1883 5300 - 8300 -aawiki: external,utf-8 CGZ pointer 626 6200 - 10300 -aawiki: external,utf-8 DHB pointer 368 9100 - 10300 -aawiki: utf-8,gzip,external simple pointer 975 8200 - 10400 -aawiki: external,utf8 DHB pointer 211 9400 - 10200 ------------------------------------------------------------------ -aawikibooks ------------------------------------------------------------------ -aawikibooks: Using bin size of 100 -aawikibooks: 0^M1000^M2000^M3000^M -aawikibooks: -aawikibooks: Flags Class Count old_id range -aawikibooks: ------------------------------------------------------------------------------------------------------------------------ -aawikibooks: [none] [none] 881 0 - 1000 -aawikibooks: external,utf-8 CGZ pointer 187 0 - 3400 -aawikibooks: external,utf-8 DHB pointer 34 3200 - 3400 -aawikibooks: object historyblobcurstub 898 900 - 1900 -aawikibooks: utf-8,gzip [none] 900 1800 - 2900 -aawikibooks: utf-8,gzip,external simple pointer 431 2800 - 3400 -aawikibooks: external,utf8 DHB pointer 25 3300 - 3400 - -""" - - -import re -import optparse - -## -## set up argument parsing. Require --input (or -i) and a filename. -usage = "usage: %prog " -desc = """Sum the storage types across all wikis. The input file should -contain the output of: - foreachwiki maintenance/storage/storageTypeStats.php -""" - -parser = optparse.OptionParser(usage=usage, description=desc) -(opts, args) = parser.parse_args() -if len(args) != 1: - print "I can't do anything without a file to parse. Sorry!" - parser.print_help() - exit(1) - -input = args[0] - -try: - file=open(input, 'r') - - # create a bunch of regexes to match various sections of the file - # a section starts with nothing on the line but the name of the wiki db - #aawikibooks - start_section = re.compile("^(?P[a-z0-9_]+)$") - #aawikibooks: external,utf-8 DHB pointer 34 3200 - 3400 - counter = re.compile("^[a-z0-9_]*: *(?P[^ ]+) +(?P[^ ]+ [^ ]*) +(?P\d+) +.*") - - # create a bunch of counters - wiki_count=0 - content_counters = dict() - - # ok, parse the file and collect stats! - for line in file: - match = start_section.match(line) - if match: - # this isn't actually used yet, but is in here for when we - # want more interesting stats and collect per-db - wiki_count += 1 - db_name=match.group('dbname') - match = counter.match(line) - if match: - # sum all unique class,flags combinations - key = "%s/%s" % (match.group('flags'), match.group('class')) - try: - content_counters[key] += int(match.group('count')) - except KeyError: - content_counters[key] = int(match.group('count')) - - -except IOError, e: - print "omg io error %s!" % e - raise e - -print "Results:" -print " Count Type" -print "------------------------------------------" -for key in sorted(content_counters.keys()): - print "%12d %s" % (content_counters[key], key) -print "all done!" - -- 2.20.1