andycroll/picturehouse_uk

View on GitHub
lib/picturehouse_uk/internal/title_sanitizer.rb

Summary

Maintainability
A
1 hr
Test Coverage
module PicturehouseUk
  # @api private
  module Internal
    # Sanitize and standardize film titles
    class TitleSanitizer < Cinebase::TitleSanitizer
      # @!method initialize(title)
      #   Constructor
      #   @param [String] title a film title
      #   @return [CineworldUk::Internal::TitleSanitizer]

      # @!method sanitized
      #   sanitized and standardized title
      #   @return [String] sanitised title

      private

      # strings and regex to be removed
      def remove
        [
          /\s\[(AS LIVE:\s*)?[ACPGU1258]+\]/, # regular certificate
          /\s\[R18\]/,                        # R18
          /\s+\(?[23][dD]\)?/,                # 2d or 3d from title
          /\bIMAX\b/i,                        # imax from title
          /\s\[NO CERT\]/,                    # no certificate
          /\s\[TBC\]/,                        # tbc
          /\s\[N\/A\]/i,                      # n/a
          /\s\(Theatre\)/i,                   # (theatre)
          /\s\[\]/,                           # blank certificate
          /ourscreen\: /,                     # ourscreen
          /\s\(Re(\: \d{0,4})?\)/i,           # Re-release
          /\s\[CERT TBC\]/,                   # certificate TBC
          /\s?\-\s?autism.*ing\s?/i,          # austim screening
          /\s?\+\s?Q\&(amp;)*A\.?/i,          # +Q&A
          /KIDS CLUB\s*/i,                    # kids club
          /DISCOVER TUE(sday\:)*\s*/i,        # discover tue
          /FREE Screening\s*-\s*/i,           # free screening
          /\s*-?\s*Big Scream\s*-?\s*/i,      # big scream
          /\*?HOH Subtitled\*?/i,             # subtitled
          /\s*\-?\s*Reminiscence/i,           # reminiscence
          /\s*\-?\s*\(?Re\-issue\)?/i,        # reissue
          /\s*-?\s*Kids\'? Club\s*-?\s*/,     # kids club
          /\s*plus Q\&A.*/i,                  # extended Q&A
          /Cinemania\s*[\:\-]/i,              # cinemania
          /\@\s*komedia/i,                    # @ komedia
          /\s*\+ panel.*/i,                   # panel
          /toddler time\s*\:*\-*/i,           # toddler time
          /\(.*\d{4}\)/,                      # year or captured year
          /\bsingalong\b/i,                   # singalong
          /\s+\-?\s*Parents \& Babies/i,      # parents and babies
          /Subtitled\:*\s*/i,                 # subtitled
          /\(?live\)?\z/i,                    # live
          /amp\;/i
        ]
      end

      # regexes and their replacements
      def replace
        {
          /Met\.? Encore:\s*(.*)/    => 'Met Opera: ',
          /Met\.? Opera:\s*(.*)/     => 'Met Opera: ',
          /National Theatre:\s*(.*)/ => 'NT Live: ',
          /NT Encore:\s*(.*)/        => 'NT Live: ',
          /NT Live:\s*(.*)/          => 'NT Live: ',
          /ROH\.? Live:\s*(.*)/      => 'Royal Opera House: ',
          /ROH\.? Encore:\s*(.*)/    => 'Royal Opera House: ',
          /RSC\.? Live:\s*(.*)/      => 'Royal Shakespeare Company: ',
          /RSC\.? Encore:\s*(.*)/    => 'Royal Shakespeare Company: ',
          /(.*) \(Bolshoi Ballet\)/  => 'Bolshoi Ballet: '
        }
      end
    end
  end
end