kevinelliott/agent_orange

View on GitHub
lib/tasks/bot_agent_test.rake

Summary

Maintainability
Test Coverage
require "agent_orange"
require 'rexml/document'

desc "Test bots detection"
task :test_bots do
    success = 0
    fail = 0
    
    # tests for successful bot detection
    bot_user_agent_strings = [
        "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)",
        "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
        ]
    browser_user_agent_strings = [
        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_5; sv-se) AppleWebKit/525.26.2 (KHTML, like Gecko) Version/3.2 Safari/525.26.12",
      "Mozilla/5.0 (iPhone; U; Linux i686; pt-br) AppleWebKit/532+ (KHTML, like Gecko) Version/3.0 Mobile/1A538b Safari/419.3 Midori/0.2.0",
      "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_1 like Mac OS X; zh-tw) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8G4 Safari/6533.18.5",
      "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
      "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-us) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
      "Mozilla/5.0 (Windows; U; Windows NT 6.1; tr-TR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27",
      "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
      "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; InfoPath.3; MS-RTC LM 8; .NET4.0C; .NET4.0E)",
      "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0",
      "Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
      "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
      "Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11"
        ]
    puts
    puts "parsing the user-agents.org list..."
    doc = REXML::Document.new(File.open("lib/tasks/allagents.xml"))
    doc.elements.each("user-agents/user-agent") do |ua|
        bot = false
        browser = false
        ua.elements.each("Type") do |item|
            if item.text == "R" || item.text == "S"
                bot = true
            elsif item.text == "B"
                browser = true
            end
            
        end
        if bot
            ua.elements.each("String") do |item|
                bot_user_agent_strings << item.text
            end
        end
        if browser
                        ua.elements.each("String") do |item|
                                browser_user_agent_strings << item.text
                        end
                end
    end
    puts "testing..."
    bot_user_agent_strings.each do |ua_str|
        ua = AgentOrange::UserAgent.new(ua_str)
        if ua.is_bot?
            #puts "SUCCESS:: " + ua_str
            success += 1
        else
            #puts "FAIL:: " + ua_str
            fail += 1
        end
    end

    puts "BOT DETECTION"
    puts success.to_s + " succeed. BENCHMARK: 376"
    puts fail.to_s + " fail. BENCHMARK: 1083"
    
    success = 0
    fail = 0 

        browser_user_agent_strings.each do |ua_str|
                ua = AgentOrange::UserAgent.new(ua_str)
                if !ua.is_bot?
                        #puts "SUCCESS:: " + ua_str
                        success += 1
                else
                        #puts "FAIL:: " + ua_str
                        fail += 1
                end
        end

    puts "FALSE POSITIVES CHECK"
    puts success.to_s + " succeed. BENCHMARK: 326"
    puts fail.to_s + " fail. BENCHMARK: 4"

end