bin/transform_CSV_to_HR_XML
#!/usr/bin/env ruby
require 'rubygems'
require 'bundler/setup'
require 'builder'
require 'csv'
require 'optparse'
require 'rest_client'
require 'time'
require 'kuali_toolbox/etl/grm'
ETL = KualiCo::ETL
GRM = KualiCo::ETL::GRM
TextParseError = KualiCo::ETL::TextParseError
def self.parse_command_line_options(
executable, args, opt={})
csv_options = {
headers: :first_row,
header_converters: :symbol,
skip_blanks: true,
col_sep: ",", # comma by default
quote_char: '"', # double quote by default
}
optparse = OptionParser.new do |opts|
opts.banner = "Usage: #{executable} [options] csv_file"
opts.on( '-o [xml_file_output]' ,'--output [xml_file_output]', 'The file in which the the XML data will be writen (defaults to <csv_file>.xml)') do |f|
opt[:xml_filename] = f
end
opts.on( '-s [separator_character]' ,'--separator [separator_character]', 'The character that separates each column of the CSV file.' ) do |s|
csv_options[:col_sep] = s
end
opts.on( '-q [quote_character]' ,'--quote [quote_character]', 'The character used to quote fields.' ) do |q|
csv_options[:quote_char] = q
end
opts.on( '-e [email_recipients]', '--email [email_recipients]', 'Email recipient list that will receive job report status.' ) do |e|
opt[:email_recipients] = e
end
opts.on( '-u [username]', '--username [username]', 'The username used to authenticate to the HR REST API.' ) do |u|
opt[:username] = u
end
opts.on( '-p [password]', '--password [password]', 'The password used to authenticate to the HR REST API.' ) do |p|
opt[:password] = p
end
opts.on( '-l [url]', '--url [url]', 'The full URL of the HR REST API; e.g. https://localhost/kc-dev/hr-import/hrimport/import' ) do |l|
opt[:url] = l
end
opts.on( '-c', '--continue', 'Continue writing the output file if errors are present' ) do
opt[:continue] = true
end
opts.on( '-h', '--help', 'Display this screen' ) do
puts opts
exit 1
end
opt[:csv_options] = csv_options
opt[:csv_filename] = args[0] unless opt[:csv_filename]
if opt[:csv_filename].nil? || opt[:csv_filename].empty?
puts opts
exit 1
end
end
optparse.parse!
# construct a sensible default ouptput filename
unless opt[:xml_filename]
file_extension = File.extname opt[:csv_filename]
dir_name = File.dirname opt[:csv_filename]
base_name = File.basename opt[:csv_filename], file_extension
opt[:xml_filename] = "#{dir_name}/#{base_name}.xml"
end
unless opt[:email_recipients]
opt[:email_recipients] = "no-reply@kuali.co"
end
if opt[:url]
unless opt[:username] && opt[:password]
raise ArgumentError, "Username and password are required when POSTing to a URL!"
end
end
return opt
end
def buildName(row)
nm = {}
nm_typ_cd = GRM.parse_name_code row[:nm_typ_cd]
prefix_nm = GRM.parse_prefix row[:prefix_nm]
first_nm = ETL.parse_string row[:first_nm], length: 40, name: 'FIRST_NM'
middle_nm = ETL.parse_string row[:middle_nm], length: 40, name: 'MIDDLE_NM'
last_nm = ETL.parse_string row[:last_nm], length: 80, name: 'LAST_NM'
suffix_nm = GRM.parse_suffix row[:suffix_nm]
title_nm = ETL.parse_string row[:title_nm], length: 20, name: 'TITLE_NM'
nm[:nameCode] = nm_typ_cd unless nm_typ_cd.empty?
nm[:prefix] = prefix_nm unless prefix_nm.empty?
nm[:firstName] = first_nm unless first_nm.empty?
nm[:middleName] = middle_nm unless middle_nm.empty?
nm[:lastName] = last_nm unless last_nm.empty?
nm[:suffix] = suffix_nm unless suffix_nm.empty?
nm[:title] = title_nm unless title_nm.empty?
if (!nm.empty?)
nm[:default] = true
nm[:active] = true
return nm
else
return nil
end
end
def buildExtendedAttributes(row)
ea = {}
visa_type = ETL.parse_string( row[:visa_type], length: 30, name: 'VISA_TYPE' )
county = ETL.parse_string( row[:county], length: 30, name: 'COUNTY' )
age_by_fiscal_year = ETL.parse_integer( row[:age_by_fiscal_year], length: 3, name: 'AGE_BY_FISCAL_YEAR' )
race = ETL.parse_string( row[:race], length: 30, name: 'RACE' )
education_level = ETL.parse_string( row[:education_level], length: 30, name: 'EDUCATION_LEVEL' )
degree = GRM.parse_degree( row[:degree], name: 'DEGREE' )
major = ETL.parse_string( row[:major], length: 30, name: 'MAJOR' )
is_handicapped = ETL.parse_boolean( row[:is_handicapped], name: 'IS_HANDICAPPED' )
handicap_type = ETL.parse_string( row[:handicap_type], length: 30, name: 'HANDICAP_TYPE' )
is_veteran = ETL.parse_boolean( row[:is_veteran], name: 'IS_VETERAN' )
veteran_type = ETL.parse_string( row[:veteran_type], length: 30, name: 'VETERAN_TYPE' )
has_visa = ETL.parse_boolean( row[:has_visa], name: 'HAS_VISA' )
visa_code = ETL.parse_string( row[:visa_code], length: 20, name: 'VISA_CODE' )
visa_renewal_date = ETL.parse_string( row[:visa_renewal_date], length: 19, name: 'VISA_RENEWAL_DATE' )
office_location = ETL.parse_string( row[:office_location], length: 30, name: 'OFFICE_LOCATION' )
secondry_office_location = ETL.parse_string( row[:secondry_office_location], length: 30, name: 'SECONDRY_OFFICE_LOCATION' )
school = ETL.parse_string( row[:school], length: 50, name: 'SCHOOL' )
year_graduated = GRM.parse_year( row[:year_graduated], name: 'YEAR_GRADUATED' )
directory_department = ETL.parse_string( row[:directory_department], length: 30, name: 'DIRECTORY_DEPARTMENT' )
directory_title = ETL.parse_string( row[:directory_title], length: 50, name: 'DIRECTORY_TITLE', strict: false )
primary_title = ETL.parse_string( row[:primary_title], length: 51, name: 'PRIMARY_TITLE', strict: false )
vacation_accural = ETL.parse_boolean( row[:vacation_accural], name: 'VACATION_ACCURAL' )
is_on_sabbatical = ETL.parse_boolean( row[:is_on_sabbatical], name: 'IS_ON_SABBATICAL' )
id_provided = ETL.parse_string( row[:id_provided], length: 30, name: 'ID_PROVIDED' )
id_verified = ETL.parse_string( row[:id_verified], length: 30, name: 'ID_VERIFIED' )
citizenship_type_code = GRM.parse_citizenship_type( row[:citizenship_type_code] )
multi_campus_principal_id = ETL.parse_string( row[:multi_campus_principal_id], length: 40, name: 'MULTI_CAMPUS_PRINCIPAL_ID' )
multi_campus_principal_name = ETL.parse_string( row[:multi_campus_principal_name], length: 100, name: 'MULTI_CAMPUS_PRINCIPAL_NAME' )
salary_anniversary_date = ETL.parse_string( row[:salary_anniversary_date], length: 10, name: 'SALARY_ANNIVERSARY_DATE' )
ea[:visaType] = visa_type unless visa_type.empty?
ea[:county] = county unless county.empty?
ea[:ageByFiscalYear] = age_by_fiscal_year unless age_by_fiscal_year.nil?
ea[:race] = race unless race.empty?
ea[:educationLevel] = education_level unless education_level.empty?
ea[:degree] = degree unless degree.empty?
ea[:major] = major unless major.empty?
ea[:handicapped] = is_handicapped unless is_handicapped.nil?
ea[:handicapType] = handicap_type unless handicap_type.empty?
ea[:veteran] = is_veteran unless is_veteran.nil?
ea[:veteranType] = veteran_type unless veteran_type.empty?
ea[:visa] = has_visa unless has_visa.nil?
ea[:visaCode] = visa_code unless visa_code.empty?
ea[:visaRenewalDate] = visa_renewal_date unless visa_renewal_date.empty?
ea[:officeLocation] = office_location unless office_location.empty?
ea[:secondaryOfficeLocation] = secondry_office_location unless secondry_office_location.empty?
ea[:school] = school unless school.empty?
ea[:yearGraduated] = year_graduated unless year_graduated.empty?
ea[:directoryDepartment] = directory_department unless directory_department.empty?
ea[:directoryTitle] = directory_title unless directory_title.empty?
ea[:primaryTitle] = primary_title unless primary_title.empty?
ea[:vacationAccrual] = vacation_accural unless vacation_accural.nil?
ea[:onSabbatical] = is_on_sabbatical unless is_on_sabbatical.nil?
ea[:idProvided] = id_provided unless id_provided.empty?
ea[:idVerified] = id_verified unless id_verified.empty?
ea[:citizenshipType] = citizenship_type_code unless citizenship_type_code.empty?
ea[:multiCampusPrincipalId] = multi_campus_principal_id unless multi_campus_principal_id.empty?
ea[:multiCampusPrincipalName] = multi_campus_principal_name unless multi_campus_principal_name.empty?
ea[:salaryAnniversaryDate] = salary_anniversary_date unless salary_anniversary_date.empty?
return ea
end
def buildAffiliations(row)
aff = {}
afltn_typ_cd = ETL.parse_string row[:afltn_typ_cd], name: 'AFLTN_TYP_CD', length: 40, required: true
campus = ETL.parse_string row[:campus_cd], name: 'CAMPUS_CD', length: 2
aff[:affiliationType] = afltn_typ_cd unless afltn_typ_cd.empty?
aff[:campus] = campus unless campus.empty?
emp = {}
emp_stat_cd = GRM.parse_emp_stat_cd row[:emp_stat_cd]
emp_typ_cd = GRM.parse_emp_typ_cd row[:emp_typ_cd]
base_slry_amt = ETL.parse_float row[:base_slry_amt], length: 15, name: 'BASE_SLRY_AMT'
prmry_dept_cd = ETL.parse_string row[:prmry_dept_cd], length: 40, name: 'PRMRY_DEPT_CD'
emp_id = ETL.parse_string row[:emp_id], length: 40, name: 'EMP_ID'
emp[:employeeStatus] = emp_stat_cd unless emp_stat_cd.empty?
emp[:employeeType] = emp_typ_cd unless emp_typ_cd.empty?
emp[:baseSalaryAmount] = base_slry_amt unless base_slry_amt.nil?
emp[:primaryDepartment] = prmry_dept_cd unless prmry_dept_cd.empty?
emp[:employeeId] = emp_id unless emp_id.empty?
if (!emp.empty?)
emp[:primaryEmployment] = true
aff[:employment] = [];
aff[:employment].push(emp);
end
if (!aff.empty?)
aff[:default] = true
aff[:active] = true
return aff
else
return nil
end
end
def buildAddress(row)
address = {}
addr_typ_cd = GRM.parse_address_type_code( row[:addr_typ_cd] )
addr_line_1 = ETL.parse_string( row[:addr_line_1], name: 'ADDR_LINE_1', length: 128 )
addr_line_2 = ETL.parse_string( row[:addr_line_2], name: 'ADDR_LINE_2', length: 128 )
addr_line_3 = ETL.parse_string( row[:addr_line_3], name: 'ADDR_LINE_3', length: 128 )
city = ETL.parse_string( row[:city], name: 'CITY', length: 30 )
state_pvc_cd = ETL.parse_string( row[:state_pvc_cd], name: 'STATE_PVC_CD', length: 2 )
postal_cd = ETL.parse_string( row[:postal_cd], name: 'POSTAL_CD', length: 20 )
postal_cntry_cd = ETL.parse_string( row[:postal_cntry_cd], name: 'POSTAL_CNTRY_CD', length: 2 )
address[:addressTypeCode] = addr_typ_cd unless addr_typ_cd.empty?
address[:addressLine1] = addr_line_1 unless addr_line_1.empty?
address[:addressLine2] = addr_line_2 unless addr_line_2.empty?
address[:addressLine3] = addr_line_3 unless addr_line_3.empty?
address[:city] = city unless city.empty?
address[:stateOrProvince] = state_pvc_cd unless state_pvc_cd.empty?
address[:postalCode] = postal_cd unless postal_cd.empty?
address[:country] = postal_cntry_cd unless postal_cntry_cd.empty?
if (!address.empty?)
address[:default] = true
return address
else
return nil
end
end
def buildPhone(row)
ph = {}
phone_typ_cd = GRM.parse_phone_type row[:phone_typ_cd]
phone_nbr = GRM.parse_phone_number row[:phone_nbr]
phone_extn_nbr = ETL.parse_string row[:phone_extn_nbr], length: 8, name: 'PHONE_EXTN_NBR'
phone_cntry_cd = ETL.parse_string row[:phone_cntry_cd], length: 2, name: 'PHONE_CNTRY_CD'
ph[:phoneType] = phone_typ_cd unless phone_typ_cd.empty?
ph[:phoneNumber] = phone_nbr unless phone_nbr.empty?
ph[:extension] = phone_extn_nbr unless phone_extn_nbr.empty?
ph[:country] = phone_cntry_cd unless phone_cntry_cd.empty?
if (!ph.empty?)
ph[:default] = true
ph[:active] = true
return ph
else
return nil
end
end
def buildAppointment(row)
ap = {}
unit_number = ETL.parse_string( row[:unit_number], length: 8, name: 'UNIT_NUMBER' )
appointment_type_code = ETL.parse_string( row[:appointment_type_code], length: 3, name: 'APPOINTMENT_TYPE_CODE' )
job_code = ETL.parse_string( row[:job_code], length: 6, name: 'JOB_CODE' )
salary = ETL.parse_float( row[:salary], length: 15, name: 'SALARY' )
appointment_start_date = ETL.parse_string( row[:appointment_start_date], name: 'APPOINTMENT_START_DATE' )
appointment_end_date = ETL.parse_string( row[:appointment_end_date], name: 'APPOINTMENT_END_DATE' )
job_title = ETL.parse_string( row[:job_title], length: 50, name: 'JOB_TITLE', strict: false )
prefered_job_title = ETL.parse_string( row[:prefered_job_title], length: 51, name: 'PREFERED_JOB_TITLE', strict: false )
ap[:unitNumber] = unit_number unless unit_number.empty?
ap[:appointmentType] = appointment_type_code unless appointment_type_code.empty?
ap[:jobCode] = job_code unless job_code.empty?
ap[:salary] = salary unless salary.nil?
ap[:startDate] = appointment_start_date unless appointment_start_date.empty?
ap[:endDate] = appointment_end_date unless appointment_end_date.empty?
ap[:jobTitle] = job_title unless job_title.empty?
ap[:preferedJobTitle] = prefered_job_title unless prefered_job_title.empty?
return ap.empty? ? nil : ap
end
def buildEmail(row)
em = {}
email_typ_cd = GRM.parse_email_type( row[:email_typ_cd] )
email_addr = GRM.parse_email_address( row[:email_addr] )
em[:emailType] = email_typ_cd unless email_typ_cd.empty?
em[:emailAddress] = email_addr unless email_addr.empty?
if (!em.empty?)
em[:default] = true
em[:active] = true
return em
else
return nil
end
end
def addToListIfNotIncluded(item, list)
if (!item.nil? && list.nil?)
return [item]
elsif (!item.nil? && !list.include?(item))
if (item[:default] && !list.find{ |x| x[:default] }.nil?)
item[:default] = false
end
return list.push(item)
else
return list
end
end
def mergeEmployment(record, newAffiliation)
if (!newAffiliation[:employment].nil?)
newAffiliation[:employment].each do |employmentInfo|
if (record[:employment].nil?)
record[:employment] = []
end
if (!record[:employment].find{ |x| x[:primaryEmployment] }.nil?)
employmentInfo[:primaryEmployment] = false
end
record[:employment].push(employmentInfo)
end
end
end
opt = parse_command_line_options (File.basename $0), ARGF.argv
text_parse_errors = []
recordsByPrincipalId = {}
CSV.open(opt[:csv_filename], opt[:csv_options]) do |csv|
record_count = csv.readlines.count
csv.rewind # go back to first row
csv.find_all do |row|
begin
principalId = GRM.parse_principal_id( row[:prncpl_id] )
record = recordsByPrincipalId.has_key?(principalId) ? recordsByPrincipalId[principalId] : {}
record[:principalId] = principalId
record[:principalName] = GRM.parse_principal_name( row[:prncpl_nm] )
record[:kcExtendedAttributes] = buildExtendedAttributes(row)
newAffiliation = buildAffiliations(row)
if (!newAffiliation.nil? && record[:affiliations].nil?)
record[:affiliations] = []
end
duplicateAffil = record[:affiliations].find{ |affil| affil[:affiliationType] == newAffiliation[:affiliationType] && affil[:campus] == newAffiliation[:campus]};
if (duplicateAffil.nil?)
if (newAffiliation[:default] && !record[:affiliations].find{ |x| x[:default] }.nil?)
newAffiliation[:default] = false
end
mergeEmployment(record, newAffiliation)
record[:affiliations].push(newAffiliation)
else
duplicateAffil[:employment].each do |employmentInfo|
newAffiliation[:employment].each do |newEmployment|
if (employmentInfo.select{ |key, value| key != :primaryEmployment} != newEmployment.select{ |key, value| key != :primaryEmployment})
e = Struct.new(:message).new("WARNING:Duplicate employment info for single affiliation. Skipping extra employment info on #{$.}");
puts e.message;
text_parse_errors.push e;
end
end
end
end
record[:names] = addToListIfNotIncluded(buildName(row), record[:names])
record[:addresses] = addToListIfNotIncluded(buildAddress(row), record[:addresses])
record[:phones] = addToListIfNotIncluded(buildPhone(row), record[:phones])
record[:appointments] = addToListIfNotIncluded(buildAppointment(row), record[:appointments])
record[:emails] = addToListIfNotIncluded(buildEmail(row), record[:emails])
recordsByPrincipalId[principalId] = record
rescue TextParseError => e
puts e.message
text_parse_errors.push e
end
end # row
#puts "#{recordsByPrincipalId}\n";
csv.rewind # go back to first row
File.open(opt[:xml_filename], 'w') do |xml_file|
xml = Builder::XmlMarkup.new target: xml_file, indent: 2
xml.instruct! :xml, encoding: "UTF-8"
xml.hrmanifest "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
"xsi:schemaLocation" => "https://github.com/KualiCo/ce-tech-docs/tree/master/v2_0 https://raw.github.com/rSmart/ce-tech-docs/master/v2_0/hrmanifest.xsd",
xmlns: "https://github.com/rSmart/ce-tech-docs/tree/master/v2_0",
schemaVersion: "2.0",
statusEmailRecipient: opt[:email_recipients],
reportDate: Time.now.iso8601,
recordCount: recordsByPrincipalId.length do |hrmanifest|
hrmanifest.records do |record|
recordsByPrincipalId.each do |principalId, info|
xml.record principalId: principalId,
principalName: info[:principalName] do |record|
record.affiliations do |affiliations|
info[:affiliations].each do |affilInfo|
affiliations.affiliation affilInfo.reject{ |k| k == :employment } do |affiliation|
affilInfo[:employment].each do |employmentInfo|
affiliation.employment employmentInfo
end
end
end
end # affiliations
unless info[:addresses].nil? || info[:addresses].empty?
record.addresses do |addresses|
info[:addresses].each do |addrInfo|
addresses.address addrInfo
end
end # addresses
end
unless info[:names].nil?
record.names do |names|
info[:names].each do |nameInfo|
names.name nameInfo
end
end # names
end
unless info[:phones].nil? || info[:phones].empty?
record.phones do |phones|
info[:phones].each do |phoneInfo|
phones.phone phoneInfo
end
end
end
unless info[:emails].nil? || info[:emails].empty?
record.emails do |emails|
info[:emails].each do |emailInfo|
emails.email emailInfo
end
end # emails
end
record.kcExtendedAttributes info[:kcExtendedAttributes]
unless info[:appointments].nil? || info[:appointments].empty?
record.appointments do |appointments|
info[:appointments].each do |appointmentInfo|
appointments.appointment appointmentInfo
end
end # appointments
end
end # record
end
end # record
end # hrmanifest
end # xml_file
end # csv
def number_of_errors(parse_errors_array)
num_errors = 0
unless parse_errors_array.empty?
parse_errors_array.each do |err|
num_errors += 1 if err.message.start_with? "ERROR"
end
end
return num_errors
end
num_errors = number_of_errors text_parse_errors
if num_errors > 0
if opt[:continue]
puts "\n#{num_errors} errors found and skipped during XML tranformation."
else
puts "\n#{num_errors} errors found and must be corrected. To save only valid records, run with the -c option.\n\n"
File.unlink opt[:xml_filename]
exit 1
end
end
puts "\nXML file written to #{opt[:xml_filename]}\n\n"
exit 1 unless GRM.validate_hr_xml opt[:xml_filename]
# POST the XML file to the server if opt[:url]
if opt[:url]
resource = RestClient::Resource.new( opt[:url], opt[:username], opt[:password] )
resource.post file: File.new(opt[:xml_filename], 'rt'), content_type: 'multipart/form-data', multipart: true
puts "\n"
end