Source

zfs-tools / lib / snapshot_set.rb

Full commit

require 'active_support/core_ext/numeric/time'
require 'active_support/core_ext/integer/time'
require 'active_support/core_ext/date/calculations'
require 'time'
require 'set'

# A set of snapshots. This class has methods that allow to determine which 
# snapshots must be deleted in order to clean up space. 
#
class SnapshotSet
  attr_reader :snapshots
  
  # Keeps metadata for each snapshot. Original name is stored as just +name+, 
  # extracted timestamp is +time+.
  #
  class Snapshot
    attr_reader :name, :time
    
    def initialize(name, default_time=Time.now)
      @time = default_time
      @name = name
      extract_timestamp(name)
    end
    
    def extract_timestamp(name)
      if md=name.match(/(\d+)-(\d+)-(\d+)-(\d{2})(\d{2})(\d{2})(.*)/)
        @time = Time.parse(md.captures.join) 
      elsif md=name.match(/(\d+)-(\d+)-(\d+)_(\d+)-(\d+)(.*)/)
        @time = Time.parse(md.captures.join) 
      else
        @time = Time.parse(name)
      end
    rescue ArgumentError
      # no time information in "foobar"
      return nil
    end
    def to_s
      name
    end
  end
  
  # Initialize this class with an unordered set of snapshot names. Names that
  # are of the form 'YYYYMMDDHHMM*' or 'YYYYMMDD*' will be treated as
  # timestamps to which time based rules apply. 
  #
  def initialize(snapshot_names)
    @snapshots = snapshot_names.
      map { |name| Snapshot.new(name) }.
      sort_by { |snapshot| snapshot.time }
  end

  # Returns the size of this set. 
  #
  def size
    snapshots.size
  end

  # Returns the set as an array of snapshot names.
  #
  def to_a
    snapshots.map(&:name)
  end
  
  # Computes snapshots to keep according to grandparent-parent-child
  # algorithm. If called with
  #
  #   set.gpc(1.day: 3, 1.week: 3)
  #
  # it will return a snapshot set that contains (ideally) 6 snapshots, 3 in
  # the current week starting one day ago, spaced out by one day. The other
  # three will be on week boundaries starting one week ago and going back 3
  # weeks. 
  #
  # The algorithm will also return all the snapshots that are less than one
  # day ago. 
  #
  def gpc(keep_specification, now=Time.now)
    keep_snapshot_names = Set.new

    # No snapshots, nothing to keep
    if snapshots.empty?
      return self.class.new([])
    end

    # Filter snapshots that we need to keep according to keep_specification
    keep_specification.each do |interval, keep_number|
      next if keep_number <= 0
      
      # We would like to sample the existing snapshots at regular offsets
      # (n * interval). 
      sampling_points = Array(1..keep_number).map { |offset| now - (offset*interval) }
      
      # For all sampling points, we'll compute the best snapshot to keep. 
      winners = sampling_points.map { |sp| 
        snapshots.map { |sh| [(sh.time-sp).abs, sh] }.    # <score, snapshot>
          sort_by { |score, sh| score }.                  # sort by score
          first.                                          # best match
          last                                            # snapshot
      }
      
      keep_snapshot_names += winners.map(&:name)
    end
    
    # Add snapshots that are within [now, smallest_interval]
    smallest_interval = keep_specification.map { |i,c| i }.min
    keep_snapshot_names += snapshots.
      select { |snapshot| snapshot.time > now-smallest_interval }.
      map(&:name)
    
    self.class.new(keep_snapshot_names.to_a)
  end
end