Commits

Anonymous committed a153306

Created repo, still needs some testing

  • Participants

Comments (0)

Files changed (6)

File archive.cron

+#!/bin/sh
+# Run this script from cron to backup the WineWiki and its logs
+# It is currently configured to be run from the parent dir of "wiki/"
+# "tar-exclude" is the only other file needed at this location
+
+CURLOG="wiki/data/event-log"
+CURDATE=$(date +%Y%m%d)
+
+# Create proper titles for the archives using `date`
+TARNAME="wiki-data-$CURDATE.tar.gz"
+IDNAME="wiki-users-$CURDATE.tar.gz"
+
+# Now make the user and content tarballs using the right exclusions
+tar -cvzf $TARNAME -X tar-exclude wiki/
+tar -cvzf $IDNAME wiki/data/user/
+
+# If the current event-log is large enough, shard and archive the top
+perl split-logs.pl -c "3 months" $CURLOG
+
+# The current event-log should probably just be compressed alone
+gzip wiki-log*
+gzip -c $CURLOG > wiki-log-current.gz

File clean-wiki.cron

+#!/bin/sh
+# Run this script regularly from cron to tidy up the Wine Wiki
+# It is currently configured to be run from the parent dir of "wiki/"
+
+# Regularly deleting the cache might not be wise
+# moin --config-dir=wiki/data/config \
+--wiki-url=http://wiki.winehq.org/ maint cleancache
+
+# But cleaning out deleted pages seems like a good idea ...
+moin --config-dir=wiki/config --wiki-url=http://wiki.winehq.org/ \
+maint cleanpage > cleanpage.sh
+
+# ... but just in case, a grace period for restoring pages
+# First extract absolute paths from cleanpage and recent files
+cat cleanpage.sh | sed "s;[^/]*\(/.*/\)[^/]*;\1;" > paths.tmp
+find wiki/data/pages -mtime -365 | sed "s;\(.*\);$PWD/\1;" > recent.tmp
+# Then isolate the common files
+comm -12 paths.tmp recent.tmp > retain.tmp
+# And comment out those lines in cleanpage (using a perl script)
+perl comment-out.pl cleanpage.sh retain.tmp
+
+# Then run cleanpage, wipe the junk directories, and make fresh ones
+./cleanpage.sh 
+rm -rf wiki/data/trash
+rm -rf wiki/data/deleted
+rm cleanpage.sh
+mkdir wiki/data/trash
+mkdir wiki/data/deleted
+
+# Now to throw out any duplicate accounts
+moin --config-dir=wiki/config --wiki-url=http://wiki.winehq.org/ \
+account check --usersunique --save
+moin --config-dir=wiki/config --wiki-url=http://wiki.winehq.org/ \
+account check --emailsunique --save
+
+# For MoinMoin 1.5, abandoned accounts were found by ".trail" mod-times
+# Later versions remove trail files, no simple record of last login :(

File comment-out.pl

+#!/usr/bin/perl
+
+# This script comments out lines in a first file based on patterns in
+# a second file
+
+# It's written in Perl because there's no way to pipe to sed -i
+
+use warnings;
+no warnings 'uninitialized';
+use strict;
+
+# Pop the parameters from the arguments, define script globals
+my $list = pop @ARGV;
+my $target = pop @ARGV;
+my %patterns;
+
+# Now open the pattern files using the given handles
+open my SCRIPT, $target or die $!;
+open my LIST, $patterns or die $!;
+open my NEW, ">", $target.tmp or die $!;
+
+# Load the list of patterns to be retained into a hash array
+while (<LIST>) { $patterns = {$_}++; }
+# Now begin scanning SCRIPT
+while (<SCRIPT>) {
+
+	my $line = $_;
+	# Begin scanning $patterns for a possible match
+	while ( ($hash, $dummy) = each %patterns) {
+
+		# If a match, ensure $line is commented out, exit hash loop
+		if ($line =~ m;$hash;) {
+			$line =~ s;(# ok: )?(.*);# ok: $1;;
+			last;
+		}
+	}
+	
+	# Print the possibly modified line to NEW
+	print NEW $line;
+}
+
+# Close all file handles
+close SCRIPT; close LIST; close NEW;
+# Clobber the old version of the script
+system("mv", "$target.tmp", "$target");

File split-logs.pl

+#!/usr/bin/perl
+
+# This script splits up MoinMoin event logs into chunks, each one
+# spanning the given number of months
+
+# It's written in Perl to allow for date processing and scanning the
+# files much faster. As a result, it uses several backticks, maybe
+# not the best idea, but simpler than the other options
+
+# I've tried to avoid messing with CPAN, and other log processing is
+# kept in the bash script to keep things simple
+
+# To allow for multiple log files, the script returns the cutoff UTC
+# of the preceding file
+
+use warnings;
+no warnings 'uninitialized';
+use strict;
+
+# Pop common parameters from script arguments, declare script globals
+my $file = pop @ARGV;
+my $interval = pop @ARGV;
+my ($current, $lastline, $lastutc, $enddate, $stoputc);
+
+# If "current" flag is set, stop the last file from becoming a rump
+if ($ARGV[0] eq '-c') {
+	
+	# Drop the flag from the argument array
+	$current = shift @ARGV;
+
+	# Calculate the month of the last entry
+	$lastline = `tail -n 1 $file`;
+	$lastutc = substr $lastline, 0, 10;
+	$enddate = `date -d "\@$lastutc" +%Y-%m-01`;
+	
+	# Then guarantee between 1 & 2 full intervals remain in the log
+	$stoputc = `date -d "$enddate $interval ago" +%s`; 
+}
+
+# Should be either a given cutoff UTC or undefined (perl handles as 0)
+my $cututc = pop @ARGV;
+
+# Open the logfile to read from
+open BIGLOG, $file or die $!;
+
+# If a previous cutoff UTC was passed, open the proper file
+my ($tmpdate, $cutdate);
+if ($cututc != 0) {
+	$tmpdate = `date -d "\@$cututc" +%Y-%m-01`;
+	$cutdate = `date -d "$tmpdate $interval yesterday" +%Y%m%d`;
+
+	# Creates weird files with \n in the name otherwise
+	chomp($cutdate);
+	open OUTFILE, ">>", "wiki-log-$cutdate" or die $!;
+}
+
+# Start reading lines from the log, flag prevents redundant read/write
+my $laterchunk = 0;
+while (<BIGLOG>) {
+	
+	# Pull the date from the top line
+	my $toputc = substr $_, 0, 10;
+
+	# If a line is past the cutoff, switch to a new cutoff and file
+	if ($toputc >= $cututc) {
+
+		$tmpdate = `date -d "\@$toputc" +%Y-%m-01`;
+		$cutdate = `date -d "$tmpdate $interval yesterday" +%Y%m%d`;
+		chomp($cutdate);
+		$cututc = `date -d "$tmpdate $interval" +%s`;
+
+		# Break from the loop if the stop UTC will be passed
+		if (($current eq '-c') && ($cututc > $stoputc)) { last; }
+
+		# We know here the log will be split at least once, set flag
+		$laterchunk = 1;
+		# Open a file with the new name
+		open OUTFILE, ">>", "wiki-log-$cutdate" or die $!;
+	}
+
+	# Write the current line to the appropriate file
+	print OUTFILE $_;
+}
+
+# Perl preserves file position on breaking from a loop
+# Move any leftover lines to a new version of the original logfile
+if (($current eq '-c') && ($laterchunk)) {
+	open OUTFILE, ">", "$file.tmp" or die $!;
+	while (<BIGLOG>) { print OUTFILE $_; }
+}
+
+# Close both files just in case ...
+close BIGLOG; close OUTFILE;
+# ... clobber the old log if necessary ...
+if -e $file.tmp { system("mv", "$file.new", "$file"); }
+# ... and return $cututc for piping
+print $cututc;

File sweep-once.sh

+#!/bin/sh
+# This script will run a one-time clean of the WineWiki on Moin v1.5
+# Remember this script assumes being run from wiki's parent dir
+
+
+# *** Clean out the cache and pages ***
+# Use the `moin` command to clear out the cache, check flags are correct
+# For v1.5.7, "config/" is in "data/", not so in later versions
+moin --config-dir=wiki/data/config --wiki-url=http://wiki.winehq.org/ \
+maint cleancache
+
+# Now use the cleanpage command to clear out the pages
+moin --config-dir=wiki/data/config --wiki-url=http://wiki.winehq.org/ \
+maint cleanpage > cleanpage.sh
+
+# There should probably be a date-based grace period though
+# First extract absolute paths from cleanpage and recent files
+cat cleanpage.sh | sed "s;[^/]*\(/.*/\)[^/]*;\1;" > paths.tmp
+find wiki/data/pages -mtime -365 | sed "s;\(.*\);$PWD/\1;" > recent.tmp
+# Then isolate the common files
+comm -12 paths.tmp recent.tmp > retain.tmp
+# And comment out those lines in cleanpage (using a perl script)
+perl comment-out.pl cleanpage.sh retain.tmp
+
+# The cleanpage script won't touch EditorBackups, perhaps it should
+# NOTE: Recent versions no longer make these pages, they won't be an
+#     issue after the migration
+# Since the patterns contain '/',':' & '#', use ';' for delimiters
+sed -i "s;\(# ok: \)\('.*MoinEditorBackup'\);mv \2 trash # trash;" \
+cleanpage.sh
+
+# After checking the script, run it, then wipe and remake the folders
+./cleanpage.sh 
+rm -rf wiki/data/trash
+rm -rf wiki/data/deleted
+rm cleanpage.sh
+rm *.tmp
+mkdir wiki/data/trash
+mkdir wiki/data/deleted
+
+# This will CHECK for any duplicate accounts
+# (use "--save" to write changes)
+moin --config-dir=wiki/data/config --wiki-url=http://wiki.winehq.org/ \
+account check --usersunique
+moin --config-dir=wiki/data/config --wiki-url=http://wiki.winehq.org/ \
+account check --emailsunique
+
+# But how to scan by dates? Use the mod times of the trail files,
+# which are still present in v1.5.7
+
+# After that, looks like finding "last-visit" data won't be guaranteed
+# until at least after v2
+
+USERDIR="wiki/data/user"
+# This first step hunts down users with trails older than 1 year
+find $USERDIR -mtime +365 -iname "*.trail" | sed "s;\(.*\)\.trail;\1;" \
+> abandoned.tmp
+# Then deletes them from the directory
+rm $(cat abandoned.tmp)*
+
+# Just to be sure, check for users without trails, and vice versa
+find $USERDIR \! -iname "*.trail" \! -iname "*.bookmark" > users.tmp
+find $USERDIR -iname "*.trail" | sed "s;\(.*\)\.trail;\1;" > trails.tmp
+comm -3 users.tmp trails.tmp > missing.tmp
+# ... and abandoned bookmarks
+find $USERDIR -iname "*.bookmark" | sed "s;\(.*\)\.bookmark;\1;" \
+> marks.tmp
+comm -13 users.tmp marks.tmp >> missing.tmp
+
+# If everything looks good, clear out the cruft
+rm $(cat missing.tmp)*
+rm *.tmp
+
+
+# *** Redo the logs (only needed at most once every migration) ***
+# First, merge the two edit-logs as they're actually quite compact
+# Clip off the unique ending of edit-log and store temporarily
+comm -13 edit-log.1 edit-log >> log-end.tmp
+# Append the unique portion of edit-log to edit-log.1
+cat log-end.tmp >> edit-log.1
+# and clobber the incomplete log and temporary file
+mv edit-log.1 edit-log
+rm log-end.tmp
+
+# Now to take care of the event-logs
+# Scan every line in the remaining logs using a tiny perl script
+TMPUTC=$(perl split-logs.pl "3 months" event-log.1)
+perl split-logs.pl -c $TMPUTC "3 months" event-log
+
+# Now event-log.1 should be safe to delete
+rm event-log.1
+# This is the exclusion list used to filter files from the tarball
+
+# No need for Python bytecode ...
+*.pyc
+
+# ... the cgi & wrapper scripts (part of install, may be obsolete) ...
+moin.cgi
+moinmod.py
+
+# ... the theme folder (moved to version-control elsewhere) ...
+winehq
+# ... and the underlay folder (part of install)
+underlay
+
+# There are also some files to be excluded in the data/ directory
+# The event-log should probably be backed up separately ...
+data/event-log*
+# ... as well as the user directory (keep it secret, keep it safe) ...
+data/user
+
+# ... ignore the plugin directory (part of install) ...
+data/plugin
+# ... and the cache directory (automatically rebuilt as needed)
+data/cache