lib/etl/vvo_v2/suppliers_information_parser.rb
# -*- encoding : utf-8 -*-
module Etl
module VvoV2
module SuppliersInformationParser
# Suppliers information
# Only for notice_result
def suppliers_information
suppliers = []
case document_format
when :format1
document.xpath("//fieldset[@class='fieldset']/legend").each do |element|
if element.inner_text.match(/ODDIEL\s+V\W/)
supplier_information = element.next_sibling.next_sibling
supplier_hash = {}
supplier_information.xpath(".//span[@class='code']").each do |code|
code_text = code.inner_text.strip
header_text = code.parent.xpath(".//span[@class='title']").inner_text
if code_text.match(/V\.*.*?[^\d]1[^\d]$/) && (header_text.match(/Dátum/) || header_text.match(/DÁTUM/))
# add non empty to array
if supplier_hash.any?
suppliers << supplier_hash if supplier_hash[:supplier_name]
supplier_hash = {}
end
contract_name_element = code.parent.previous_sibling.previous_sibling
if contract_name_element.name == "div" && contract_name_element.attributes["class"].value == "textArea"
supplier_hash[:contract_name] = contract_name_element.xpath(".//span[2]").inner_text.strip
end
contract_date_element = next_element(code.parent)
supplier_hash[:contract_date] = parse_date(contract_date_element.inner_text)
elsif code_text.match(/V\.*.*?[^\d]1[^\d]$/) && (header_text.match(/VÝBER NAJVHODNEJŠIEHO/))
# add non empty to array
if supplier_hash.any?
suppliers << supplier_hash if supplier_hash[:supplier_name]
supplier_hash = {}
end
contract_name_element = code.parent.previous_sibling.previous_sibling
if contract_name_element.name == "div" && contract_name_element.attributes["class"].value == "textArea"
supplier_hash[:contract_name] = contract_name_element.xpath(".//span[2]").inner_text.strip
end
elsif code_text.match(/V\.*.*?[^\d]2[^\d]$/) && (header_text.match(/Počet predložených ponúk/) || header_text.match(/Počet uchádzačov, ktorí predložili ponuku/))
offers_total_count_element = next_element(code.parent)
supplier_hash[:offers_total_count] = offers_total_count_element.inner_text.strip.to_i
elsif code_text.match(/V\.*.*?[^\d]1[^\d]$/) && (header_text.match(/Počet uchádzačov/))
offers_total_count_element = next_element(code.parent)
supplier_hash[:offers_total_count] = offers_total_count_element.inner_text.strip.to_i
elsif code_text.match(/V\.*.*?[^\d]3[^\d]$/) && header_text.match(/Počet vylúčených uchádzačov alebo záujemcov/)
offers_excluded_count_element = next_element(code.parent)
supplier_hash[:offers_excluded_count] = offers_excluded_count_element.inner_text.strip.to_i
elsif code_text.match(/V\.*.*?[^\d]2[^\d]$/) && (header_text.match(/INFORMÁCIA O PONUKÁCH/) || header_text.match(/Informácie o ponukách/))
offers_count_element = next_element(code.parent)
while offers_count_element && offers_count_element.xpath(".//span[@class='code']").empty?
if offers_count_element.inner_text.match(/Počet prijatých ponúk/)
supplier_hash[:offers_total_count] = offers_count_element.xpath(".//span").inner_text.to_i
elsif offers_count_element.inner_text.match(/Počet ponúk prijatých elektronickou cestou/)
supplier_hash[:offers_online_count] = offers_count_element.xpath(".//span").inner_text.to_i
end
offers_count_element = next_element(offers_count_element)
end
elsif (code_text.match(/V\.*.*?[^\d]3[^\d]$/) || code_text.match(/V\.*.*?[^\d]4[^\d]$/)) && (header_text.match(/Názov a adresa/) || header_text.match(/NÁZOV A ADRESA/) || header_text.match(/Meno(.*)adresa(.*)víťaza/))
supplier_information = next_element(code.parent)
supplier_hash[:supplier_name] = supplier_information.xpath("./span[@class='titleValue']/span[1]").inner_text.strip
supplier_hash[:supplier_organisation_code] = supplier_information.xpath(".//span[@class='titleValue']/span[3]").inner_text.strip
full_address = supplier_information.xpath(".//span[@class='titleValue']").children.last.inner_text.strip
address, zip, place = parse_address(full_address)
supplier_hash[:supplier_address] = address
supplier_hash[:supplier_zip] = zip
supplier_hash[:supplier_place] = place
address_element = supplier_information.xpath(".//span[@class='titleValue']").first
country_element = next_element(address_element)
while country_element && country_element.inner_text.strip.blank?
country_element = next_element(country_element)
end
supplier_hash[:supplier_country] = country_element.inner_text.strip if country_element
supplier_information.xpath("./span").each do |span|
if span.inner_text.strip.start_with?('Telefón')
supplier_hash[:supplier_phone] = span.next_sibling.next_sibling.inner_text.strip
elsif span.inner_text.strip.start_with?('Fax')
supplier_hash[:supplier_fax] = span.next_sibling.next_sibling.inner_text.strip
elsif span.inner_text.strip.start_with?('Email')
supplier_hash[:supplier_email] = span.next_sibling.next_sibling.inner_text.strip
elsif span.inner_text.strip.start_with?('Mobil')
supplier_hash[:supplier_mobile] = span.next_sibling.next_sibling.inner_text.strip
end
end
elsif (code_text.match(/V\.*.*?[^\d]4[^\d]$/) || code_text.match(/V\.*.*?[^\d]5[^\d]$/)) && (header_text.match(/Informácie o hodnote/) || header_text.match(/INFORMÁCIE O HODNOTE/) || header_text.match(/HODNOTA/))
price_element = next_element(code.parent)
if price_element.inner_text.strip.match(/Hodnota/)
price_value_element = price_element
price_hash = parse_price1(price_value_element)
supplier_hash[:procurement_currency] = price_hash[:currency]
supplier_hash[:final_price] = price_hash[:price]
supplier_hash[:final_price_vat_included] = price_hash[:price_vat_included]
supplier_hash[:final_price_vat_rate] = price_hash[:price_vat_rate]
supplier_hash[:final_price_range] = false
else
while price_element && price_element.xpath(".//span[@class='code']").inner_text.blank?
if price_element.inner_text.strip.match(/konečná(.*)hodnota/)
price_value_element = next_price_value_element(price_element)
price_hash = parse_price1(price_value_element)
supplier_hash[:procurement_currency] = price_hash[:currency]
if price_hash[:price_range]
supplier_hash[:final_price_min] = price_hash[:price_min]
supplier_hash[:final_price_max] = price_hash[:price_max]
supplier_hash[:final_price_range] = true
else
supplier_hash[:final_price] = price_hash[:price]
supplier_hash[:final_price_range] = false
end
supplier_hash[:final_price_vat_included] = price_hash[:price_vat_included]
supplier_hash[:final_price_vat_rate] = price_hash[:price_vat_rate]
elsif price_element.inner_text.strip.match(/predpokladaná(.*)hodnota/)
price_value_element = next_price_value_element(price_element)
price_hash = parse_price1(price_value_element)
supplier_hash[:procurement_currency] = price_hash[:currency]
supplier_hash[:draft_price] = price_hash[:price]
supplier_hash[:draft_price_vat_included] = price_hash[:price_vat_included]
supplier_hash[:draft_price_vat_rate] = price_hash[:price_vat_rate]
end
price_element = next_element(price_element)
end
end
# Subdodavky
elsif (code_text.match(/V\.*.*?[^\d]4[^\d]$/) || code_text.match(/V\.*.*?[^\d]5[^\d]$/)) && (header_text.match(/predpoklad subdodávok/) || header_text.match(/subdodávkach/))
subcontracted_element = next_element(code.parent)
subcontracted = subcontracted_element.xpath(".//span").inner_text.strip
supplier_hash[:procurement_subcontracted] = !subcontracted.match(/Nie/)
end
end
# add non empty to array
if supplier_hash.any?
suppliers << supplier_hash if supplier_hash[:supplier_name]
supplier_hash = {}
end
end
end
when :format2
document.xpath("//table[@class='mainTable']//td[@class='cast']").each do |element|
if element.inner_text.match(/ODDIEL\s+V\W/)
supplier_hash = {}
supplier_information_element = element.xpath(".//table[1]//tr[2]//td[2]//table[1]//tr")
supplier_information_element = element.xpath(".//table[1]//tr[1]//td[1]//table[1]//tr") if supplier_information_element.empty?
supplier_information_element = element.xpath(".//table[1]//tr[1]//td[2]//table[1]//tr") if supplier_information_element.empty?
supplier_information_element.each do |tr|
code_text = tr.xpath(".//td[@class='kod']").inner_text.strip
header_text = tr.xpath(".//td[2]//span[@class='nazov']").inner_text.strip
if tr.xpath(".//span[@class='podnazov']") && tr.xpath(".//span[@class='podnazov']").inner_text.match(/(Z|z)mluva k časti/)
# add non empty to array
if supplier_hash.any?
suppliers << supplier_hash if supplier_hash[:supplier_name]
supplier_hash = {}
end
contract_name_element = next_element(tr)
if contract_name_element.xpath(".//span[@class='nazov']").inner_text.match(/Názov/) || contract_name_element.xpath(".//span[@class='podnazov']").inner_text.match(/Názov/)
supplier_hash[:contract_name] = strip_last_point(contract_name_element.xpath(".//span[@class='hodnota']").inner_text.strip)
end
elsif code_text.blank? && tr.xpath(".//span[@class='podnazov']") && tr.xpath(".//span[@class='podnazov']").inner_text.match(/Názov/)
# add non empty to array
if supplier_hash.any?
suppliers << supplier_hash if supplier_hash[:supplier_name]
supplier_hash = {}
end
supplier_hash[:contract_name] = strip_last_point(tr.xpath(".//span[@class='hodnota']").inner_text.strip)
elsif code_text.match(/V\.*.*?[^\d]1[^\d]$/) && (header_text.match(/Dátum/) || header_text.match(/DÁTUM/))
contract_date_element = tr.xpath(".//span[@class='hodnota']")
supplier_hash[:contract_date] = parse_date(contract_date_element.inner_text.strip)
elsif code_text.match(/V\.*.*?[^\d]2[^\d]$/) && (header_text.match(/Počet prijatých/) || header_text.match(/POČET PRIJATÝCH/))
supplier_hash[:offers_total_count] = tr.xpath(".//span[@class='hodnota']").inner_text.to_i
elsif code_text.match(/V\.*.*?[^\d]1[^\d]$/) && header_text.match(/Počet uchádzačov/)
supplier_hash[:offers_total_count] = tr.xpath(".//span[@class='hodnota']").inner_text.to_i
elsif tr.inner_text.strip.match(/V(\.)(\d\.)?3(\D*)$/) && (header_text.match(/Názov/) || header_text.match(/NÁZOV/) || header_text.match(/Meno(.*)adresa(.*)víťaza/))
supplier_wrapper_element = next_element(tr)
supplier_information_element = supplier_wrapper_element.xpath(".//table[1]//td[@class='hodnota']")
supplier_hash[:supplier_name] = supplier_information_element[0].xpath(".//span[@class='hodnota']").inner_text
organisation_code = parse_organisation_code(supplier_information_element[1].xpath(".//span[@class='hodnota']").inner_text)
if organisation_code.blank? || mostly_numbers?(organisation_code)
address_index = 2
else
organisation_code = ""
address_index = 1
end
supplier_hash[:supplier_organisation_code] = organisation_code
if supplier_information_element[address_index].inner_text.match(/Poštová adresa/)
supplier_hash[:supplier_address] = supplier_information_element[address_index].xpath(".//span[@class='hodnota']").inner_text
supplier_hash[:supplier_zip] = parse_zip(supplier_information_element[address_index+1].xpath(".//span[@class='hodnota']").inner_text)
supplier_hash[:supplier_place] = supplier_information_element[address_index+2].xpath(".//span[@class='hodnota']").inner_text
supplier_hash[:supplier_country] = supplier_information_element[address_index+3].xpath(".//span[@class='hodnota']").inner_text
else
full_address = supplier_information_element[address_index].xpath(".//span[@class='hodnota']").inner_text
address, zip, place = parse_address(full_address)
supplier_hash[:supplier_address] = address
supplier_hash[:supplier_zip] = zip
supplier_hash[:supplier_place] = place
supplier_hash[:supplier_country] = supplier_information_element[address_index+1].xpath(".//span[@class='hodnota']").inner_text
end
supplier_information_element.each do |td|
header = td.children.first.inner_text.strip
content = td.xpath(".//span[@class='hodnota']").inner_text.strip
if header.match(/Telefón/)
supplier_hash[:supplier_phone] = content
elsif header.match(/Fax/)
supplier_hash[:supplier_fax] = content
elsif header.match(/E-mail/)
supplier_hash[:supplier_email] = content
elsif header.match(/Mobil/)
supplier_hash[:supplier_mobile] = content
end
end
# prices
elsif tr.inner_text.strip.match(/V(\.)(\d\.)?4(\D*)$/) && (header_text.match(/Informácie o hodnote/) || header_text.match(/INFORMÁCIE O HODNOTE/) || header_text.match(/HODNOTA OCENENÍ/))
price_element = next_element(tr)
while price_element && price_element.xpath(".//td[@class='kod']").inner_text.blank?
if price_element.inner_text.strip.match(/konečná(.*)hodnota/) || price_element.inner_text.strip.match(/Hodnota udelených ocenení/)
if price_element.inner_text.strip.match(/Hodnota udelených ocenení/)
price_value_element = price_element
else
price_value_element = next_price_value_element(price_element)
end
if price_value_element
price_hash = parse_price2(price_value_element)
supplier_hash[:procurement_currency] = price_hash[:currency]
if price_hash[:price_range]
supplier_hash[:final_price_min] = price_hash[:price_min]
supplier_hash[:final_price_max] = price_hash[:price_max]
supplier_hash[:final_price_range] = true
else
supplier_hash[:final_price] = price_hash[:price]
supplier_hash[:final_price_range] = false
end
supplier_hash[:final_price_vat_included] = price_hash[:price_vat_included]
supplier_hash[:final_price_vat_rate] = price_hash[:price_vat_rate]
end
elsif price_element.inner_text.strip.match(/predpokladaná(.*)hodnota/)
price_value_element = next_price_value_element(price_element, [/konečná(.*)hodnota/])
if price_value_element
price_hash = parse_price2(price_value_element)
supplier_hash[:procurement_currency] = price_hash[:currency]
supplier_hash[:draft_price] = price_hash[:price]
supplier_hash[:draft_price_vat_included] = price_hash[:price_vat_included]
supplier_hash[:draft_price_vat_rate] = price_hash[:price_vat_rate]
end
end
price_element = next_element(price_element)
end
elsif code_text.match(/V\.*.*?[^\d]5[^\d]$/) && (header_text.match(/PREDPOKLAD SUBDODÁVOK/) || header_text.match(/predpoklad subdodávok/) || header_text.match(/subdodávkach/))
supplier_hash[:procurement_subcontracted] = !tr.xpath(".//span[@class='hodnota']").inner_text.match(/Nie/)
end
end
# add non empty to array
if supplier_hash.any?
suppliers << supplier_hash if supplier_hash[:supplier_name]
supplier_hash = {}
end
end
end
else
end
{:suppliers => suppliers}
end
end
end
end