plugins/googlebot.nb
# -*-ruby; coding: utf-8 -*- vim:set ft=ruby:
#
# Copyright (c) 2004-2005 SASADA Koichi <ko1 at atdot.net>
# Copyright (c) 2009, 2010 Kazuhiro NISHIYAMA
#
# This program is free software with ABSOLUTELY NO WARRANTY.
# You can re-distribute and/or modify this program under
# the same terms of the Ruby's license.
#
#
# $Id$
#
=begin
== Usage with irc client
google> keyword
-> search keyword by google with default search langage
google:[lang]> keyword
-> search keyword by google with [lang] langage
googlec> k1 k2 k3 k4 k5(max 5 words)
-> search and show each hit count
googlec> k1 k2 k3 k4 k5(max 5 words)
-> search and show each hit count with default count language
googlec:[lang]> k1 k2 k3 k4 k5(max 5 words)
-> search and show each hit count with [lang] langage
== Configuration:
BotConfig = [
{
:name => :GoogleBot,
:ch => /.*/,
:headers => {
#"User-Agent" => "Ruby/#{RUBY_VERSION}",
'Referer' => 'https://github.com/nadoka/nadoka',
},
# API key
:api_key => 'INSERT_YOUR_API_KEY',
# Custom search engine ID
:cx => '017576662512468239146:omuauf_lfve',
:googlec_maxwords => 5,
:search_default_lang => 'ja',
:count_default_lang => '',
:ch_kcode => :tojis,
},
]
=end
unless "".respond_to?(:encode)
require 'iconv'
end
require 'kconv'
require 'shellwords'
require 'cgi'
require 'open-uri'
begin
require 'json'
rescue LoadError
require 'rubygems'
require 'json'
end
if __FILE__ == $0
# for test
module Nadoka
class NDK_Bot
def bot_init_utils
end
def initialize
@bot_config = Hash.new
@bot_config[:headers] = {
"User-Agent" => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) snap Chromium/80.0.3987.132 Chrome/80.0.3987.132 Safari/537.36",
}
bot_initialize
end
end
end
end
class GoogleBot < Nadoka::NDK_Bot
def bot_initialize
bot_init_utils
@search_default_lang = (@bot_config[:search_default_lang] || 'ja').sub(/^lang_/, '')
@googlec_maxwords = @bot_config[:googlec_maxwords] || 5
@count_default_lang = (@bot_config[:count_default_lang] || '').sub(/^lang_/, '')
@headers = @bot_config.fetch(:headers, {})
@api_key = @bot_config[:api_key]
@cx = @bot_config[:cx]
@uri_slog = @bot_config.fetch(:uri_slog, false)
@ch_kcode = @bot_config.fetch(:ch_kcode, :tojis)
end
def on_privmsg prefix, ch, msg
return unless @available_channel === ch
return if same_bot?(ch)
msg = NKF.nkf('-w', msg)
if response = dispatch_command(msg)
send_notice(ch, response.send(@ch_kcode))
end
end
SEARCHER = %w!web calc code local video blogs news books images ime imed patent suggest!.freeze
SEARCHER_RE = Regexp.new("(?:" + SEARCHER.join('|') + ")").freeze
def search_searcher key
SEARCHER.each{|searcher|
if /\A#{key}/ =~ searcher
return searcher
end
}; nil
end
def dispatch_command msg
begin
case msg
when /^g>\s*(.+)/
custom_search $1
when /^goo(o*)gle( #{SEARCHER_RE})?(:.*?)?>\s*(.+)/o, /^gu(u*)guru(#{SEARCHER_RE})?(:.+)?>\s*(.+)/o
"goo#{$1}gle#{$2} bot#{$3}: #{search($1.length, $3, $4, $2)}"
when /^googlec( #{SEARCHER_RE})?(:.*?)?>\s*(.+)/o
"googlec#{$1} bot#{$2}: #{googlec($1, $3, $2)}"
when /^g(\w+)?(:.*?)?>\s*(.+)/
searcher = $1 ? search_searcher($1) : 'web'
"google #{searcher} bot#{$2}: #{search(0, $2, $3, searcher)}" if searcher
end
rescue Exception => e
@manager.ndk_error e
"google bot: #{e.class} (#{e.message} @ #{e.backtrace[0]})"
end
end
def custom_search word
uri = "https://www.googleapis.com/customsearch/v1"
uri << "?key=#{@api_key}&cx=#{@cx}&q="
uri << CGI.escape(word)
@logger.slog "GoogleBot: #{uri}" if @uri_slog
result = open(uri, @headers) do |f|
JSON.parse(f.read)
end
@logger.slog "GoogleBot: #{result}" if @uri_slog
count = result["searchInformation"]["totalResults"].to_i
if count == 0
return "no match"
end
count = count.to_s.gsub(/(\d)(?=\d{3}+$)/, '\\1,')
item, = result["items"]
title = item["title"]
url = item["link"]
"#{title} - #{url} (and #{count} hit#{(count.to_i > 1) ? 's' : ''})".delete("\r\n")
rescue OpenURI::HTTPError => e
@logger.slog "GoogleBot: #{e.inspect}" if @uri_slog
result = JSON.parse(e.io.read)
if @uri_slog
@logger.slog "GoogleBot: #{result.inspect}"
end
result["error"]["errors"][0]["reason"].delete("\r\n")
end
def do_search word, cnt, lang, searcher='web'
i = 0
begin
uri = "http://ajax.googleapis.com/ajax/services/search/"
uri << searcher
uri << "?v=1.0&q="
uri << CGI.escape(word)
if @api_key
uri << "&key=#{CGI.escape(@api_key)}"
end
cnt = cnt.to_i
if cnt > 0
uri << "&start=#{cnt.to_i}"
end
if lang
uri << "&hl=#{CGI.escape(lang)}"
if searcher == 'web'
uri << "&lr=lang_#{CGI.escape(lang)}"
end
end
@logger.slog "GoogleBot: #{uri}" if @uri_slog
result = open(uri, @headers) do |f|
JSON.parse(f.read)
end
def result.estimatedTotalResultsCount
self["responseData"]["cursor"]["estimatedResultCount"]
end
result
rescue Exception => e
retry if (i+=1) < 5
raise
end
end
def api_search word, cnt, lang, searcher
result = do_search word, cnt, lang, searcher
if result["responseData"].nil?
# {"responseData": null, "responseDetails": "qps rate exceeded", "responseStatus": 503}
return "error #{result['responseStatus']}: #{result['responseDetails']}"
end
count = result.estimatedTotalResultsCount.to_i
if count > 0
count = count.to_s.gsub(/(\d)(?=\d{3}+$)/, '\\1,')
url = title = ''
e = result["responseData"]["results"][0]
url = e['unescapedUrl'] || e['url'] || e['postUrl']
title = show_char_code_and_erase_tag(e['titleNoFormatting'])
url = shorten_url(url)
"#{title} - #{url} (and #{count} hit#{(count.to_i > 1) ? 's' : ''})"
else
"no match"
end
end
def google_calc exp
@logger.slog("google_calc<#{exp.dump}")
uri = "https://www.google.co.jp/search?ie=UTF8&oe=UTF-8&q=#{CGI.escape(exp)}"
html = open(uri, @headers) do |f|
f.read
end
open("g.html", "wb") { |f| f.write html } if $DEBUG
if /class=r [^<>]+><b>(.+?)<\/b>/u =~ html
result = $1
# @logger.slog("google_calc>#{result.dump}")
result.gsub!(/<sup>(.+?)<\/sup>/u) { "^(#{$1})" }
result.gsub!(/<.+?>/u, '')
result.gsub!(/&\#215;/u, "\303\227")
return result
elsif /<[^<>]+ id="cwos"[^<>]*>([^<>]+)</u =~ html
result = $1
if /<[^<>]+ id="cwles"[^<>]*>([^<>]+)</u =~ html
result = "#{$1}#{result}"
end
#@logger.slog("google_calc>#{result.dump}")
result.gsub!(/ /u, " ")
result.gsub!(/\s+/, " ")
return result
elsif /<div class="leg_calc[^<>]*>(?:<div[^<>]*>)+([^<>]+)<\/div><div[^<>]*>([^<>]+)</u =~ html
result = "#{$1} #{$2}"
#@logger.slog("google_calc>#{result.dump}")
return result
elsif /<g-card>(.*)<\/g-card>/ =~ html
result = $1
result.sub!(/<a.*/u, '')
result.gsub!(/<.+?>/u, '')
return result
elsif /<div class="RJn8N xXEKkb ellip[^"]*">= ([^<>]+)</ =~ html
result = $1
#@logger.slog("google_calc>#{result.dump}")
return result
else
#IO.write('g.html', html) if STDOUT.tty?
"response error"
end
rescue Exception
$!.to_s
end
def google_suggest(word, lang)
uri = "http://suggestqueries.google.com/complete/search?output=firefox"
uri << "&q="
uri << CGI.escape(word)
if lang
uri << "&hl=#{CGI.escape(lang)}"
end
@logger.slog "GoogleBot: #{uri}" if @uri_slog
result = open(uri, @headers) do |f|
JSON.parse(f.read)
end
result[1].join(", ")
end
def google_code key
return "http://google.com/codesearch#search/&q=#{CGI.escape(key)}&ct=os"
end
if defined?(URI.encode_www_form)
def encode_www_form(enum)
URI.encode_www_form(enum)
end
else
def encode_www_form(enum)
enum.map do |k, v|
"#{URI.encode(k)}=#{URI.encode(v)}"
end.join('&')
end
end
# see http://www.google.com/intl/ja/ime/cgiapi.html
def google_ime text, d=false
url = 'http://www.google.com/transliterate?'
url << encode_www_form('langpair' => 'ja-Hira|ja', 'text' => text)
data = open(url,@headers){|f|
# TODO: gsub fix invalid JSON, should remove after fix response
# see http://www.google.com/support/forum/p/ime/thread?tid=06501c8b7a16add3&hl=ja
JSON.parse(f.read.gsub(/,(?=\n\])/,''))
}
if d
result = data.map do |org, candidates|
"#{org}=#{candidates.join('/')}"
end.join(' ')
else
result = data.map do |org, candidates|
candidates[0]
end.join('')
end
show_char_code_and_erase_tag(result)
rescue Exception
$!.to_s[/.+/] # first line
end
def search cnt, lang, word, searcher=nil
lang = lang_check(lang)
searcher = searcher_check(searcher)
word = search_char_code(word)
case searcher
when 'code'
google_code word
when 'calc'
google_calc word
when 'ime'
google_ime word
when 'imed'
google_ime word, true
when 'suggest'
google_suggest word, lang
else
api_search word, cnt, lang, searcher
end
end
def googlec lang, word, searcher=nil
lang = lang_check(lang, @count_default_lang)
searcher = searcher_check(searcher)
words = Shellwords.shellwords(word).map{|e| "\"#{e}\""}
return 'too many options' if words.size > @googlec_maxwords
words.map{|rw|
w = search_char_code(rw)
result = do_search "'#{w}'", 0, lang, searcher
"#{rw}(#{result.estimatedTotalResultsCount.to_s.gsub(/(\d)(?=\d{3}+$)/, '\\1,')})"
}.join(', ')
end
def erase_tag str
CGI.unescapeHTML(str.gsub(/\<.+?\>/, ''))
end
def lang_check lang, default = @search_default_lang
if !lang
@search_default_lang
else
lang = lang[1..-1]
if lang.empty?
nil
elsif /^lang_/ =~ lang
lang.sub(/^lang_/, '')
else
lang
end
end
end
def searcher_check searcher
if !searcher
'web'
else
searcher = searcher.strip
if SEARCHER.include?(searcher)
searcher
else
'web'
end
end
end
def show_char_code_and_erase_tag str
if str.respond_to?(:encode)
return CGI.unescapeHTML(erase_tag(str.toutf8))
end
return CGI.unescapeHTML(erase_tag(str.toeuc))
case $KCODE
when 'EUC', 'SJIS'
CGI.unescapeHTML(str.gsub(/\<.+?\>/, ''))
when 'NONE', 'UTF-8'
begin
str = Iconv.conv("EUC-JP", "UTF-8", str)
CGI.unescapeHTML(str.gsub(/\<.+?\>/, ''))
rescue => e
"(char code problem: #{e.class}[#{e.message.dump}])"
end
else
str
end
end
def search_char_code str
if str.respond_to?(:encode)
return str.toutf8
end
case $KCODE
when 'EUC', 'SJIS'
str.toeuc
when 'NONE'
begin
Iconv.conv("UTF-8", "EUC-JP", str.toeuc)
rescue => e
raise "(char code problem: #{e.class})"
end
when 'UTF-8'
str
else
raise
end
end
def shorten_url(url)
case url
when %r!\Ahttp://www\.amazon\.co\.jp/.*(/dp/.+)\z!
"http://amazon.jp#{$1}"
else
# default: do nothing
url
end
end
end
if __FILE__ == $0
if ARGV.empty?
puts "ad hoc test usage:"
puts " ruby -vd plugins/googlebot.nb 'gc>1+1'"
puts " ruby -vd plugins/googlebot.nb 'gc>1ドルを円で'"
end
require 'logger'
google_bot = GoogleBot.new
google_bot.instance_eval do
@logger = Object.new
def @logger.slog(log)
STDERR.puts "slog>#{log}"
end
end
ARGV.each do |arg|
puts arg
puts google_bot.dispatch_command(arg)
end
end