amatriain/feedbunch

View on GitHub
FeedBunch-app/lib/opml_importer.rb

Summary

Maintainability
A
2 hrs
Test Coverage
# frozen_string_literal: true

require 'zip'
require 'zip/filesystem'
require 'nokogiri'

##
# This class manages import of subscription data from another feed aggregator into Feedbunch

class OpmlImporter

  # Class constant for the directory in which OPML export files will be saved.
  FOLDER = 'opml_imports'

  ##
  # This method extracts subscriptions data from an OPML file and
  # saves them in a (unzipped) OPML file in the filesystem. Afterwards it enqueues a background job
  # to import those subscriptions in the user's account.
  #
  # Receives as arguments the file uploaded by the user and user that requested the import.
  #
  # Optionally the file can be a zip archive; this is the format one gets when exporting from Google.
  #
  # If any error is raised during importing, this method raises an OpmlImportError, to ensure that the user is
  # always redirected to the start page, instead of being left at a blank HTTP 500 page.

  def self.enqueue_import_job(file, user)
    Rails.logger.info "User #{user.id} - #{user.email} requested import of a data file"
    # Destroy the current import job state for the user. This in turn triggers a deletion of any associated import failure data.
    user.opml_import_job_state&.destroy
    user.create_opml_import_job_state state: OpmlImportJobState::RUNNING

    subscription_data = read_data_file file
    filename = "feedbunch_import_#{Time.zone.now.to_i}.opml"
    Feedbunch::Application.config.uploads_manager.save user.id, FOLDER, filename, subscription_data

    Rails.logger.info "Enqueuing Import Subscriptions Job for user #{user.id} - #{user.email}, OPML file #{filename}"
    ImportOpmlWorker.perform_async filename, user.id
    return nil
  rescue => e
    Rails.logger.error "Error trying to read OPML data from file uploaded by user #{user.id} - #{user.email}"
    Rails.logger.error e.message
    Rails.logger.error e.backtrace
    user.opml_import_job_state&.destroy
    user.create_opml_import_job_state state: OpmlImportJobState::ERROR
    raise OpmlImportError.new
  end

  ##
  # Process an OPML file with subscriptions for a user, and then delete it.
  #
  # Receives as arguments:
  # - the name of the file, including path from Rails.root (e.g. 'uploads/1371321122.opml')
  # - the user who is importing the file
  #
  # The file is retrieved using the currently configured uploads_manager (from the filesystem or from Amazon S3).

  def self.process_opml(filename, user)
    # Open file and check if it actually exists
    xml_contents = Feedbunch::Application.config.uploads_manager.read user.id, FOLDER, filename
    if xml_contents == nil
      Rails.logger.error "Trying to import for user #{user.id} from non-existing OPML file: #{filename}"
      raise OpmlImportError.new
    end

    # Parse OPML file (it's actually XML)
    begin
      docXml = Nokogiri::XML(xml_contents) {|config| config.strict}
    rescue Nokogiri::XML::SyntaxError => e
      Rails.logger.error "Trying to parse malformed XML file #{filename}"
      raise e
    end

    # Count total number of feeds
    total_feeds = count_total_feeds docXml
    # Check that the file was actually an OPML file with feeds
    if total_feeds == 0
      Rails.logger.error "Trying to import for user #{user.id} from OPML file: #{filename} but file contains no feeds"
      raise OpmlImportError.new
    end
    # Update total number of feeds, so user can see progress.
    user.opml_import_job_state.update total_feeds: total_feeds

    # Arrays that will be passed to ImportSubscriptionsWorker
    urls = []
    folder_ids = []

    # Process feeds that are not in a folder
    docXml.xpath('/opml/body/outline[@type="rss" and @xmlUrl]').each do |feed_node|
      folder_ids << nil
      urls << feed_node['xmlUrl']
    end

    # Process feeds in folders
    docXml.xpath('/opml/body/outline[not(@type="rss")]').each do |folder_node|
      # Ignore <outline> nodes which contain no feeds
      if folder_node.xpath('./outline[@type="rss" and @xmlUrl]').present?
        folder_title = folder_node['title'] || folder_node['text']
        folder = import_folder folder_title, user
        folder_node.xpath('./outline[@type="rss" and @xmlUrl]').each do |feed_node|
          folder_ids << folder.id
          urls << feed_node['xmlUrl']
        end
      end
    end

    # Enqueue set of workers with sidekiq-superworker to import each individual feed
    ImportSubscriptionsWorker.perform_async user.opml_import_job_state.id, urls, folder_ids

    return nil
  end

  #############################
  # PRIVATE CLASS METHODS
  #############################

  ##
  # Read a data file and return its contents. Accepts as argument a file, which can be:
  # - an unzipped data file
  # - a zip archive containing a data file. In this case the data file inside the zip
  # will be read and returned.
  #
  # When searching inside a zip archive for a data file, searches will be performed
  # in this order:
  # - a subscriptions.xml file
  # - any file with .opml extension
  # - any file with .OPML extension
  # - any file with .xml extension
  # - any file with .XML extension
  #
  # The first matching file found will be read and returned. Files will be found even
  # if they are inside a folder (or several levels of folders).
  #
  # If no matching file is found inside the zip, an OpmlImportError will be raised.

  def self.read_data_file(file)
    begin
      zip_file = Zip::File.open file
      file_contents = search_zip zip_file, /subscriptions.xml\z/
      file_contents = search_zip zip_file, /.opml\z/ if file_contents.blank?
      file_contents = search_zip zip_file, /.OPML\z/ if file_contents.blank?
      file_contents = search_zip zip_file, /.xml\z/ if file_contents.blank?
      file_contents = search_zip zip_file, /.XML\z/ if file_contents.blank?
      zip_file.close

      if file_contents.blank?
        Rails.logger.warn 'Could not find OPML file in uploaded data file'
        raise OpmlImportError.new
      end
    rescue Zip::Error => e
      # file is not a zip, read it normally
      Rails.logger.info 'Uploaded file is not a zip archive, it is probably an uncompressed OPML file'
      file_contents = File.read file
    end

    return file_contents
  end
  private_class_method :read_data_file

  ##
  # Search among the files in a zip archive a file which name (including extension)
  # matches the pattern passed as argument.
  #
  # Receives as arguments the opened zip file and the search pattern.
  #
  # The search is case-sensitive
  #
  # Returns the contents of the first mathing file found, or nil if there were no matches.

  def self.search_zip(zip_file, pattern)
    file_contents = nil
    zip_file.each do |f|
      if f.name =~ pattern
        Rails.logger.debug "Found OPML file #{f.name} in uploaded zip archive"
        file_contents = zip_file.file.read f.name
        file_contents.force_encoding 'utf-8'
        break
      end
    end

    return file_contents
  end
  private_class_method :search_zip

  ##
  # Count the number of feeds in an OPML file.
  #
  # Receives as argument an OPML document parsed by Nokogiri.
  #
  # Returns the number of feeds in the document.

  def self.count_total_feeds(docXml)
    feeds_not_in_folders = docXml.xpath 'count(/opml/body/outline[@type="rss" and @xmlUrl])'
    feeds_in_folders = docXml.xpath 'count(/opml/body/outline[not(@type="rss")]/outline[@type="rss" and @xmlUrl])'
    return feeds_not_in_folders + feeds_in_folders
  end
  private_class_method :count_total_feeds

  ##
  # Import a folder, creating it if necessary. The folder will be owned by the passed user.
  # If the user already has a folder with the same title, no action will be taken.
  #
  # Receives as arguments the title of the folder and the user who requested the import.
  #
  # Returns the folder. It may be a newly created folder, if the user didn't have a folder with the same title,
  # or it may be an already existing folder if he did.

  def self.import_folder(title, user)
    folder = user.folders.find_by title: title

    if folder.blank?
      Rails.logger.info "User #{user.id} - #{user.email} imported new folder #{title}, creating it"
      folder = user.folders.create title: title
    else
      Rails.logger.info "User #{user.id} - #{user.email} imported already existing folder #{title}, reusing it"
    end

    return folder
  end
  private_class_method :import_folder
end