snowplow/sluice

View on GitHub
lib/sluice/storage/storage.rb

Summary

Maintainability
A
0 mins
Test Coverage
# Copyright (c) 2012-2014 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0,
# and you may not use this file except in compliance with the Apache License Version 2.0.
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the Apache License Version 2.0 is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.

# Author::    Alex Dean (mailto:support@snowplowanalytics.com), Michael Tibben
# Copyright:: Copyright (c) 2012-2014 Snowplow Analytics Ltd
# License::   Apache License Version 2.0

module Sluice
  module Storage

    # To handle negative file matching
    NegativeRegex = Struct.new(:regex)  

    # Find files within the given date range
    # (inclusive).
    #
    # Parameters:
    # +start_date+:: start date
    # +end_date+:: end date
    # +date_format:: format of date in filenames
    # +file_ext:: extension on files (if any)   
    def self.files_between(start_date, end_date, date_format, file_ext=nil)

      dates = []
      Date.parse(start_date).upto(Date.parse(end_date)) do |day|
        dates << day.strftime(date_format)
      end

      '(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext)
    end

    # Add a trailing slash to a path if missing.
    # Tolerates a nil path.
    #
    # Parameters:
    # +path+:: path to add a trailing slash to
    def self.trail_slash(path)
      unless path.nil?
        path[-1].chr != '/' ? path << '/' : path
      end
    end

    # Find files up to (and including) the given date.
    #
    # Returns a regex in a NegativeRegex so that the
    # matcher can negate the match.
    #
    # Parameters:
    # +end_date+:: end date
    # +date_format:: format of date in filenames
    # +file_ext:: extension on files (if any)  
    def self.files_up_to(end_date, date_format, file_ext=nil)

      # Let's create a black list from the day
      # after the end_date up to today
      day_after = Date.parse(end_date) + 1
      today = Date.today

      dates = []
      day_after.upto(today) do |day|
        dates << day.strftime(date_format) # Black list
      end

      NegativeRegex.new('(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext))
    end

    # Find files starting from the given date.
    #
    # Parameters:
    # +start_date+:: start date
    # +date_format:: format of date in filenames
    # +file_ext:: extension on files (if any); include period    
    def self.files_from(start_date, date_format, file_ext=nil)

      # Let's create a white list from the start_date to today
      today = Date.today

      dates = []
      Date.parse(start_date).upto(today) do |day|
        dates << day.strftime(date_format)
      end

      '(' + dates.join('|') + ')[^/]+%s$' % regexify(file_ext)
    end

    private

    # Make a file extension regular expression friendly,
    # adding a starting period (.) if missing 
    #
    # Parameters:
    # +file_ext:: the file extension to make regexp friendly
    def self.regexify(file_ext)
      file_ext.nil? ? nil : file_ext[0].chr != '.' ? '\\.' << file_ext : '\\' << file_ext
    end

  end
end