linguisticexplorer/Linguistic-Explorer

View on GitHub
lib/group_data/validator.rb

Summary

Maintainability
F
1 wk
Test Coverage
# GroupDataValidator
#
# ==> Category.csv <==
# id,name,depth,group_id,creator_id,description
#
# ==> Example.csv <==
# id,name,ling_id,group_id,creator_id
#
# ==> ExampleLingsProperty.csv <===
# id,example_id,lings_property_id,group_id,creator_id
#
# ==> Group.csv <==
# id, name, privacy, depth_maximum, ling0_name, ling1_name, property_name, category_name, lings_property_name, example_name, examples_lings_property_name, example_fields
#
# ==> Ling.csv <==
# id,name,parent_id,depth,group_id,creator_id
#
# ==> LingsProperty.csv <==
# id,ling_id,property_id,value,group_id,creator_id
#
# ==> Membership.csv <==
# id,member_id,group_id,level,creator_id
#
# ==> Property.csv <==
# id,name,description,category_id,group_id,creator_id
#
# ===> StoredValue.csv <=====
# id, storable_id, storable_type, key, value, group_id
#
# ==> User.csv <==
# id,name,email,access_level,password
#

require 'csv'
require 'progressbar'

module GroupData
  class Validator

    attr_reader :check_all, :check_users, :check_groups, :check_memberships,
                :check_categories, :check_lings, :check_properties, :check_lings_properties,
                :check_examples_lp, :check_stored_values, :check_examples, :check_parents

    class << self
      def load(config, verbose=true)
        validator = new(config, verbose)
        validator
      end
    end

    attr_accessor :config

    def self.lazy_init_cache(*caches)
      caches.each do |cache|
        define_method("#{cache}") do
          instance_variable_get("@#{cache}") ||
              (instance_variable_set("@#{cache}", {}) && instance_variable_get("@#{cache}"))
        end
      end
    end

    #puts "Loading lazy_cache"
    lazy_init_cache :groups, :user_ids, :ling_ids, :category_ids, :property_ids, :example_ids, :lings_property_ids

    # accepts path to yaml file containing paths to csvs
    def initialize(config, verbose)
      @config = config
      @config.symbolize_keys!
      @check_all = true
      @headers = load_headers
      @verbose = verbose
    end

    # disabled progress_loading graphical interface for best performance
    def validate!

      reset = "\r\e[0K"

      @check_users = true
      line = reset_line

      add_check_all(validate_csv_header :user, @check_users)
      user_bar = ProgressBar.new("Users...", csv_size(:user)) if @verbose
      csv_for_each :user do |row|
        user = true
        row.each do |col, value|
          user &= value.present?
        end

        print_error :err_missing, :user, line unless user

        @check_users &= user
        # cache user id
        user_ids[row["id"]] = true
        user_bar.inc if @verbose
        line += 1
        break unless user
      end
      user_bar.finish if @verbose
      add_check_all(@check_users)

      @check_groups = true

      add_check_all(validate_csv_header :group, @check_groups)
      line = reset_line
      # This function will change the header
      # due to a very common typo on csv
      fix_csv_elp_name
      group_bar = ProgressBar.new("Groups...", csv_size(:group)) if @verbose
      csv_for_each :group do |row|
        group = true
        row.each do |col, value|
          group &= value.present?
        end
        print_error :err_missing, :group, line unless group

        group &= row["privacy"].downcase == "public" || row["privacy"].downcase == "private"
        print_error :err_validity, :group, line, "privacy", "Privacy", row["privacy"] unless group

        group &= !row["privacy"].downcase!
        print_error :err_lowercase, :group, line, "privacy",  "Privacy", row["privacy"] unless group

        @check_groups &= group
        # cache group id
        groups[row["id"]] = true
        group_bar.inc if @verbose
        line += 1
        break unless group
      end
      group_bar.finish if @verbose

      add_check_all(@check_groups)

      @check_memberships = true
      line = reset_line
      add_check_all(validate_csv_header :membership, @check_memberships)
      member_bar = ProgressBar.new("Memberships", csv_size(:membership)) if @verbose
      csv_for_each :membership do |row|
        membership = true
        row.each do |col, value|
          membership &= value.present? unless col=="creator_id"
        end
        print_error :err_missing, :membership, line unless membership

        membership &= groups[row["group_id"]] if membership
        print_error :err_foreign, :membership, line, "group_id" unless membership

        if row["creator_id"].present?
          membership &= user_ids[row["creator_id"]]
          print_error :err_foreign, :membership, line, "creator_id" unless membership
        end

        membership &= row["level"].downcase == "admin" || row["level"].downcase == "member"
        print_error :err_validity, :membership, line, "level", "Access Level", row["level"] unless membership

        membership &= !row["level"].downcase!
        print_error :err_lowercase, :membership, line, "level", "Access Level", row["level"] unless membership

        @check_memberships &= membership
        line += 1
        member_bar.inc if @verbose

        break unless membership
      end
      member_bar.finish if @verbose
      add_check_all(@check_memberships)

      @check_lings = true
      line = reset_line
      add_check_all(validate_csv_header :ling, @check_lings)
      ling_bar = ProgressBar.new("Lings", csv_size(:ling))  if @verbose
      csv_for_each :ling do |row|
        ling = true
        row.each do |col, value|
          ling &= value.present? unless col=="creator_id" || col=="parent_id"
        end
        print_error :err_missing, :ling, line unless ling

        ling &= groups[row["group_id"]] if ling
        print_error :err_foreign, :ling, line, "group_id" unless ling

        if row["creator_id"].present?
          ling &= user_ids[row["creator_id"]] if ling
          print_error :err_foreign, :ling, line, "creator_id" unless ling
        end

        @check_lings &= ling
        # cache ling id
        ling_ids[row["id"]] = row["group_id"]

        line += 1
        ling_bar.inc  if @verbose
        break unless ling
      end
      ling_bar.finish  if @verbose
      add_check_all(@check_lings)
      line = reset_line
      @check_parents = true
      ling_ass_bar = ProgressBar.new("Lings Associations", csv_size(:ling)) if @verbose
      csv_for_each :ling do |row|
        if row["parent_id"].blank?
          ling_ass_bar.inc  if @verbose
          next
        end

        parent = ling_ids[row["parent_id"]].present?
        print_error :err_foreign, :ling, line, "parent_id" unless parent

        parent &= ling_ids[row["parent_id"]] == row["group_id"]
        print_error :err_foreign, :ling, line, "group_id" unless parent

        print_to_console "\n=> Should be '#{ling_ids[row["parent_id"]]}' instead of '#{row["group_id"]}'" unless parent
        @check_parents &= parent

        line += 1
        ling_ass_bar.inc  if @verbose
        break unless parent
      end
      ling_ass_bar.finish  if @verbose
      add_check_all(@check_parents)

      @check_categories = true
      line = reset_line
      add_check_all(validate_csv_header :category, @check_categories)
      cat_bar = ProgressBar.new("Category", csv_size(:category)) if @verbose
      csv_for_each :category do |row|
        category = true
        row.each do |col, value|
          category &= value.present? unless col=="creator_id" || col=="description"
        end
        print_error :err_missing, :category, line unless category

        category &= groups[row["group_id"]] if category
        print_error :err_foreign, :category, line, "group_id" unless category

        if row["creator_id"].present?
          category &= user_ids[row["creator_id"]] if category
          print_error :err_foreign, :category, line, "creator_id" unless category
        end

        @check_categories &= category

        # cache category id
        category_ids[row["id"]] = true

        line += 1
        cat_bar.inc  if @verbose
        break unless category
      end
      cat_bar.finish  if @verbose
      add_check_all(@check_categories)

      @check_properties = true
      line = reset_line
      add_check_all(validate_csv_header :property, @check_properties)
      prop_bar = ProgressBar.new("Property", csv_size(:property)) if @verbose
      csv_for_each :property do |row|
        property = true
        row.each do |col, value|
          property &= value.present? unless col=="creator_id" || col=="description"
        end

        print_error :err_missing, :property, line unless property

        property &= groups[row["group_id"]] if property
        print_error :err_foreign, :property, line, "group_id" unless property

        property &= category_ids[row["category_id"]] if property
        print_error :err_foreign, :property, line, "category_id" unless property

        if row["creator_id"].present?
          property &= user_ids[row["creator_id"]] if property
          print_error :err_foreign, :property, line, "creator_id" unless property
        end

        @check_properties &= property
        # cache property id
        property_ids[row["id"]] = true

        line += 1
        prop_bar.inc  if @verbose
        break unless property
      end
      prop_bar.finish  if @verbose
      add_check_all(@check_properties)

      @check_examples = true
      line = reset_line
      add_check_all(validate_csv_header :example, @check_examples)
      ex_bar = ProgressBar.new("Examples", csv_size(:example)) if @verbose
      csv_for_each :example do |row|
        example = true
        row.each do |col, value|
          example &= value.present? unless col=="creator_id"
        end

        print_error :err_missing, :example, line unless example

        example &= groups[row["group_id"]] if example
        print_error :err_foreign, :example, line, "group_id" unless example

        example &= ling_ids[row["ling_id"]] if example
        print_error :err_foreign, :example, line, "ling_id" unless example

        if row["creator_id"].present?
          example &= user_ids[row["creator_id"]] if example
          print_error :err_foreign, :example, line, "creator_id" unless example
        end

        @check_examples &= example
        # cache example id
        example_ids[row["id"]] = true

        line += 1
        ex_bar.inc  if @verbose
        break unless example
      end
      ex_bar.finish  if @verbose
      add_check_all(@check_examples)

      @check_lings_properties = true
      line = reset_line
      add_check_all(validate_csv_header :lings_property, @check_lings_properties)
      lp_bar = ProgressBar.new("Lings Properties", csv_size(:lings_property)) if @verbose
      csv_for_each :lings_property do |row|
        lp = true
        row.each do |col, value|
          lp &= value.present? unless col=="creator_id"
        end

        print_error :err_missing, :lings_property, line unless lp

        lp &= groups[row["group_id"]] if lp
        print_error :err_foreign, :lings_property, line, "group_id" unless lp

        lp &= ling_ids[row["ling_id"]] if lp
        print_error :err_foreign, :lings_property, line, "ling_id" unless lp

        if row["creator_id"].present?
          lp &= user_ids[row["creator_id"]] if lp
          print_error :err_foreign, :lings_property, line, "creator_id" unless lp
        end

        @check_lings_properties &= lp
        lp_bar.inc  if @verbose
        # cache lings_property id
        lings_property_ids[row["id"]] = true

        break unless lp
      end
      lp_bar.finish  if @verbose
      add_check_all(@check_lings_properties)

      @check_examples_lp = true
      line = reset_line
      add_check_all(validate_csv_header :examples_lings_property, @check_examples_lp)
      elp_bar = ProgressBar.new("Examples Lings Properties", csv_size(:examples_lings_property)) if @verbose
      csv_for_each :examples_lings_property do |row|
        elp = true
        row.each do |col, value|
          elp &= value.present? unless col=="creator_id"
        end

        print_error :err_missing, :examples_lings_property, line unless elp

        elp &= groups[row["group_id"]] if elp
        print_error :err_foreign, :examples_lings_property, line, "group_id" unless elp

        elp &= lings_property_ids[row["lings_property_id"]] if elp
        print_error :err_foreign, :examples_lings_property, line, "lings_property_id" unless elp

        elp &= example_ids[row["example_id"]] if elp
        print_error :err_foreign, :examples_lings_property, line, "example_id" unless elp

        if row["creator_id"].present?
          elp &= user_ids[row["creator_id"]] if elp
          print_error :err_foreign, :examples_lings_property, line, "example_id" unless elp
        end

        @check_examples_lp &= elp
        elp_bar.inc  if @verbose
        line += 1
        break unless elp
      end
      elp_bar.finish  if @verbose

      add_check_all(@check_examples_lp)

      @check_stored_values = true
      line = reset_line
      add_check_all(validate_csv_header :stored_value, @check_stored_values)
      sv_bar = ProgressBar.new("Stored Values", csv_size(:stored_value)) if @verbose
      csv_for_each :stored_value do |row|
        stored_value = true
        row.each do |col, value|
          stored_value &= value.present?
        end

        print_error :err_missing, :stored_value, line unless stored_value

        stored_value &= groups[row["group_id"]] if stored_value
        print_error :err_foreign, :stored_value, line, "group_id" unless stored_value

        stored_value &= example_ids[row["storable_id"]] if stored_value
        print_error :err_foreign, :stored_value, line, "storable_id" unless stored_value

        @check_stored_values &= stored_value

        line += 1
        sv_bar.inc  if @verbose
        break unless stored_value
      end
      sv_bar.finish  if @verbose
      add_check_all(@check_stored_values)
      @check_all
    end

    private

    def reset_line()
      return 1
    end

    def csv_for_each(key)
      CSV.foreach(@config[key], :headers => true) do |row|
        yield(row)
      end
    end

    def csv_size(key)
      (CSV.read(@config[key]).length) -1
    end

    def fix_csv_elp_name
      # Load the CSV file
      file = @config[:group]
      string_fixed = "examples_lings_property_name,"
      bad_string = "example_lings_propert"

      text = File.read(file){|f| f.readline}
      new_text = text.gsub(/#{bad_string}.*,/, string_fixed)
      File.open(file, "w") {|file| file.puts new_text}
    end

    def validate_csv_header(key, check)
      file = @config[key]
      text = File.read(file){|f| f.readline}
      header = @headers[key]

      header.each do |title|
        check &= text.match title unless title=="creator_id"

        print_header_error key, title unless check
        break unless check
      end

      return check
    end

    def print_error(type, key, line, *args)
      col, name, value = args
      print_to_console "\n#{red("ERROR")} - Foreign Key check fails in #{key.to_s.camelize}.csv - [#{col.capitalize}] line #{line+1}" if type==:err_foreign
      print_to_console "\n#{red("ERROR")} - Missing parameter in #{key.to_s.camelize}.csv - line #{line+1}" if type==:err_missing
      print_to_console "\n#{red("ERROR")} - Header Validation fails for #{key.to_s.camelize}.csv\n=> Please check for '#{col}' column" if type==:err_header
      print_to_console "\n#{red("ERROR")} - #{name} value should be valid in #{key.to_s.camelize}.csv - line #{line+1}\n => '#{value}' not valid" if type==:err_validity
      print_to_console "\n#{red("ERROR")} - #{name} should be lowercase in #{key.to_s.camelize}.csv - line #{line+1}" if type==:err_lowercase
      print_to_console "\n"
    end

    def print_header_error(key, title)
      print_error :err_header, key, 0, title
    end

    def red(string)
      "\e[31m#{string}\e[0m"
    end

    def add_check_all(check_partial)
      @check_all &= check_partial
      print_to_console("\n") unless @check_all
      exit(1) unless @check_all
    end

    def load_headers
      { :user => ["name","id","email","access_level","password"],
        :group => ["id", "name" ,"privacy", "depth_maximum", "ling0_name", "ling1_name", "property_name", "category_name", "lings_property_name", "example_name", "examples_lings_property_name", "example_fields" ],
        :membership => [ "id", "member_id", "group_id", "level", "creator_id" ],
        :ling => [ "id","name","parent_id","depth","group_id","creator_id" ],
        :category => [ "id","name","depth","group_id","creator_id","description" ],
        :property => [ "id","name","description","category_id","group_id","creator_id" ],
        :example => [ "id","ling_id","group_id","creator_id","name" ],
        :lings_property => [ "id","ling_id","property_id","value","group_id","creator_id" ],
        :examples_lings_property => [ "id","example_id","lings_property_id","group_id","creator_id" ],
        :stored_value => [ "id","storable_id","storable_type","key","value","group_id" ]
      }
    end

    def print_to_console(string)
      print string if @verbose
    end

  end

end