lib/seek/data/bio_samples.rb
require "libxml"
module Seek
module Data
class BioSamples
attr_reader :investigation, :study, :assay, :assay_class, :assay_type,
:units, :treatments, :organisms, :strains, :culture_growth_type, :tissue_and_cell_types,
:specimens, :samples, :specimen_names, :sample_names, :treatments,:treatments_text,
:rna_extractions, :sequencing
attr_accessor :errors,:warnings
def initialize file, xml=nil, to_populate=true, institution_id = nil
@file = file
@investigation = nil
@study = nil
@assay = nil
@assay_class = nil
@assay_type = nil
@units = {}
@treatments = {}
@organisms = {}
@strains = {}
@culture_growth_type = nil
@tissue_and_cell_types = {}
@specimens = {}
@samples = {}
@sample_comments = {}
@creator = nil
@errors = ""
@to_populate = to_populate
@specimen_names = {}
@sample_names = {}
@treatments_text = {}
@rna_extractions = {}
@sequencing = {}
@warnings = [] # bittkomk: missing @warnings caused some errors -- was it supposed to be injected somehow?
@parser_mapping = nil
@samples_mapping = nil
@assay_mapping = nil
@num_rows = 1000 # bittkomk: this value is used for the creation of vectors of fixed or not-mapped entries; it gets actualized with the number of rows of mapped entries during parsing
@start_row = 1
@mock_json_import = {}
@assay_json = {}
@institution_title = ""
@institution_title = Institution.find(institution_id).try(:title) if institution_id
if xml
begin
doc = LibXML::XML::Parser.string(xml).parse
rescue Exception => e
doc = nil
Rails.logger.warn "Invalid xml encountered. - #{e.message}"
end
if doc
template = @file.template_name
Rails.logger.warn "Template = #{template}, Institution name = " + @institution_title
filename = @file.content_blob.original_filename
parser_mapper = Seek::ParserMapper.new
@parser_mapping = parser_mapper.mapping(template.downcase != "autodetect by filename" && template.downcase != "none" ? template.downcase : parser_mapper.filename_to_mapping_name(filename))
if @parser_mapping
@samples_mapping = @parser_mapping[:samples_mapping]
@assay_mapping = @parser_mapping[:assay_mapping]
Rails.logger.warn @samples_mapping
extract_from_document doc
else
Rails.logger.warn "No parser mapping found for #{filename}"
@errors << "No parser mapping found for #{filename}"
raise @errors
end
end
end
end
private
def extract_from_document doc
doc.root.namespaces.default_prefix = "ss"
template_sheet = nil
samples_sheet = nil
if @assay_mapping
template_sheet = find_template_sheet doc
end
if @samples_mapping
samples_sheet = find_samples_sheet doc
end
if template_sheet.nil? && samples_sheet.nil?
@errors << "This #{t('data_file')} does not match the given template."
raise @errors
end
if template_sheet
@assay_json = build_assay_mock_json template_sheet
#set_creator @assay_json
#@file.creators << @creator unless @file.creators.include?(@creator) || @creator.nil?
#populate_assay , filename if @to_populate
#else
# @errors << "This data file does not match the template."
# raise @errors ## bittkomk: this is ok, since not all templates contain information for populating assays
end
if samples_sheet
build_all_bio_sample_json samples_sheet
else
@errors << "No samples sheet is found."
raise @errors
end
if @to_populate
Rails.logger.warn "MOCK JSON:"
Rails.logger.warn build_mock_json_import
Rails.logger.warn "Populate db"
populate_db build_mock_json_import
end
end
def find_template_sheet doc
#sheet = doc.find_first("//ss:sheet[@name='IDF']")
#sheet = doc.find_first("//ss:sheet[@name='idf']") if sheet.nil?
#sheet = doc.find_first("//ss:sheet[@name='Idf']") if sheet.nil?
#sheet = hunt_for_sheet(doc, "IDF") if sheet.nil?
template_sheet_name = @assay_mapping[:assay_sheet_name]
Rails.logger.warn "template_sheet_name: " + template_sheet_name
if template_sheet_name
#template_sheet_name.downcase!
sheet = doc.find_first("//ss:sheet[translate(@name, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = '#{template_sheet_name.downcase}']")
sheet = hunt_for_sheet(doc, template_sheet_name) if sheet.nil?
sheet
else
nil
end
end
def find_samples_sheet doc
#sheet = doc.find_first("//ss:sheet[@name='SDRF']")
#sheet = doc.find_first("//ss:sheet[@name='sdrf']") if sheet.nil?
#sheet = doc.find_first("//ss:sheet[@name='Sdrf']") if sheet.nil?
#sheet = hunt_for_sheet(doc, "SDRF") if sheet.nil?
samples_sheet_name = @samples_mapping[:samples_sheet_name]
Rails.logger.warn "samples_sheet_name: " + samples_sheet_name
if samples_sheet_name
#samples_sheet_name.downcase!
sheet = doc.find_first("//ss:sheet[translate(@name, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = '#{samples_sheet_name.downcase}']")
sheet = hunt_for_sheet(doc, samples_sheet_name) if sheet.nil?
sheet
else
nil
end
end
def hunt_for_sheet doc, keyword
doc.find("//ss:sheet").find do |sheet|
sheet_name=sheet.attributes["name"]
possible_cells = sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row='1']")
match = possible_cells.find do |cell|
cell.content.match(/"*#{keyword}".*/i)
end
!match.nil?
end
end
def build_assay_mock_json sheet
if @assay_mapping[:parsing_direction] == "horizontal"
mapped_field_hunter = method(:hunt_for_horizontal_field_value_mapped)
else
mapped_field_hunter = method(:hunt_for_field_values_mapped)
# probing number of rows with data in sheet
hunt_for_field_values_mapped sheet, @assay_mapping[:probing_column], @assay_mapping, true
end
investigation_title = mapped_field_hunter.call(sheet, :"investigation.title", @assay_mapping).first() ? mapped_field_hunter.call(sheet, :"investigation.title", @assay_mapping).first()[:value] : nil
assay_type_title = mapped_field_hunter.call( sheet, :"assay_type.title", @assay_mapping).first() ? mapped_field_hunter.call( sheet, :"assay_type.title", @assay_mapping).first()[:value] : nil
study_title = mapped_field_hunter.call( sheet, :"study.title", @assay_mapping).first() ? mapped_field_hunter.call( sheet, :"study.title", @assay_mapping).first()[:value] : nil
creator_email = mapped_field_hunter.call( sheet, :"creator.email", @assay_mapping).first() ? mapped_field_hunter.call( sheet, :"creator.email", @assay_mapping).first()[:value] : nil
creator_last_name = mapped_field_hunter.call( sheet, :"creator.last_name", @assay_mapping).first() ? mapped_field_hunter.call( sheet, :"creator.last_name", @assay_mapping).first()[:value] : nil
creator_first_name = mapped_field_hunter.call( sheet, :"creator.first_name", @assay_mapping).first() ? mapped_field_hunter.call( sheet, :"creator.first_name", @assay_mapping).first()[:value] : nil
assay = {"investigation title" => investigation_title,
"assay type title" => assay_type_title,
"study title" => study_title,
"creator email" => creator_email,
"creator last name" => creator_last_name,
"creator first name" => creator_first_name}
end
def populate_assay assay_json, filename
investigation_title = assay_json["investigation title"]
assay_type_title = assay_json["assay type title"]
study_title = assay_json["study title"]
@investigation = Investigation.find_all_by_title(investigation_title).detect{|i|i.can_view? User.current_user}
unless @investigation
@investigation = Investigation.new :title => investigation_title
@investigation.projects = User.current_user.person.projects
@investigation.policy = Policy.private_policy
investigation.save!
end
#create new assay and study
@study = Study.find_all_by_title(study_title).detect{|s|s.can_edit? User.current_user}
unless @study
@study = Study.new :title => study_title
@study.policy = Policy.private_policy
end
@study.lock!
@study.investigation = @investigation
study.save!
assay_class = AssayClass.where(title: I18n.t('assays.experimental_assay')).first_or_create
assay_type_in_ontology = Seek::Ontologies::AssayTypeReader.instance.class_hierarchy.hash_by_label[assay_type_title.downcase]
assay_type = assay_type_in_ontology || SuggestedAssayType.where(label: assay_type_title).first_or_create
assay_title = filename.nil? ? "dummy #{t('assays.assay').downcase}" : filename.split(".").first
@assay = Assay.all.detect{|a|a.title == assay_title && a.study_id == study.id && a.assay_class_id == assay_class.try(:id) && a.assay_type_uri== assay_type.uri.try(:to_s) && a.owner_id == User.current_user.person.id}
unless @assay
@assay = Assay.new :title => assay_title
@assay.policy = Policy.private_policy
end
@assay.lock!
@assay.assay_class = assay_class
@assay.assay_type_uri = assay_type.uri.try(:to_s)
@assay.assay_type_label = assay_type_title
### unknown technology type
#@assay.technology_type_uri = Seek::Ontologies::TechnologyTypeReader.instance.class_hierarchy.hash_by_label
@assay.study = study
@assay.save!
@assay.relate @file
@assay
end
# population of treatments, specimens and samples if to_populate = true
# otherwise we collect just some data for the show data file view
# population of x happens according to this schema:
# * check if x should be added
# * if yes:
# ** get the data out of the sheets (using the parser mapping)
# ** build a nice data structure for passing to the populate_x method
# ** call populate_x method
# *** write data to db if it isn't already there
def build_all_bio_sample_json sheet
# population order should NOT change, DB is populated only if @to_populate is set to be true
# probing number of rows with data in sheet
hunt_for_field_values_mapped sheet, @samples_mapping[:probing_column], @samples_mapping, true
#populate treatments
if @samples_mapping[:add_treatments]
treatment_concentrations = hunt_for_field_values_mapped sheet, :"treatment.concentration", @samples_mapping
treatment_substances = hunt_for_field_values_mapped sheet, :"treatment.substance", @samples_mapping
treatment_units = hunt_for_field_values_mapped sheet, :"treatment.unit", @samples_mapping
treatment_protocols = hunt_for_field_values_mapped sheet, :"treatment.treatment_protocol", @samples_mapping
treatment_incubation_time = hunt_for_field_values_mapped sheet, :"treatment.incubation_time", @samples_mapping
treatment_incubation_time_unit = hunt_for_field_values_mapped sheet, :"treatment.incubation_time_unit", @samples_mapping
treatment_type = hunt_for_field_values_mapped sheet, :"treatment.type", @samples_mapping
treatment_comments = hunt_for_field_values_mapped sheet, :"treatment.comments", @samples_mapping
Rails.logger.warn "$$$$$$$$$$$$$$ treatment_concentrations #{treatment_concentrations}"
Rails.logger.warn "$$$$$$$$$$$$$$ treatment_substances #{treatment_substances}"
Rails.logger.warn "$$$$$$$$$$$$$$ treatment_units #{treatment_units}"
Rails.logger.warn "$$$$$$$$$$$$$$ treatment_protocols #{treatment_protocols}"
Rails.logger.warn "$$$$$$$$$$$$$$ treatment_incubation_time #{treatment_incubation_time}"
Rails.logger.warn "$$$$$$$$$$$$$$ treatment_incubation_time_unit #{treatment_incubation_time_unit}"
Rails.logger.warn "$$$$$$$$$$$$$$ treatment_type #{treatment_type}"
Rails.logger.warn "$$$$$$$$$$$$$$ treatment_comments #{treatment_comments}"
treatment_data = treatment_protocols.zip(treatment_substances, treatment_concentrations, treatment_units, treatment_incubation_time, treatment_incubation_time_unit,
treatment_type, treatment_comments).map do
|protocol, substance, concentration, unit, incubation_time, incubation_time_unit, type, comments|
{:protocol => protocol, :substance => substance, :concentration => concentration, :unit => unit, :incubation_time => incubation_time, :incubation_time_unit => incubation_time_unit,
:type => type, :comments => comments}
end
Rails.logger.warn "$$$$$$$$$$$$ TREATMENT DATA (#{treatment_data.length}) #{treatment_data}"
build_all_treatment_mock_json treatment_data
unless @to_populate
treatment_data.each do |t|
treatments_hash = {}
t.each {|k, v| treatments_hash[k] = v[:value]} #t.map {|key, value| {key => value[:value]}}
row = t.values.first[:row]
treatments_text[row] = treatments_hash #data.values.join.to_s
end
end
end
#treatment_protocols = hunt_for_field_values sheet, "Treatment"
#treatment_attributes = []
#treatment_attributes = get_attribute_names sheet, treatment_protocols.first.attributes["row"].to_i - 2, treatment_protocols.first.attributes["column"].to_i, "Treatment" unless treatment_protocols.blank?
#treatment_protocols.each do |treatment_protocol|
# if @to_populate
# populate_treatment sheet, treatment_protocol
# end
# set_treatments sheet, treatment_protocol, treatment_attributes
#end
#extract specimen and sample names from the file
#specimen_name_cells = hunt_for_field_values sheet, "Specimen"
#set_specimen_names specimen_name_cells
#sample_name_cells = hunt_for_field_values sheet, "Sample Name"
#set_sample_names sample_name_cells
#@sample_comments = hunt_for_field_values sheet, "Optional"
#populate specimens and samples
#if @to_populate
# specimen_name_cells.each do |specimen|
# populate_specimen sheet, specimen
# end
# sample_name_cells.each do |sample|
# populate_sample sheet, sample
# end
#end
# populate specimens
if @samples_mapping[:add_specimens]
specimen_titles = hunt_for_field_values_mapped sheet, :"specimens.title", @samples_mapping # required
specimen_sexes = hunt_for_field_values_mapped sheet, :"specimens.sex", @samples_mapping
specimen_ages = hunt_for_field_values_mapped sheet, :"specimens.age", @samples_mapping
specimen_age_units = hunt_for_field_values_mapped sheet, :"specimens.age_unit", @samples_mapping
specimen_comments = hunt_for_field_values_mapped sheet, :"specimens.comments", @samples_mapping
organism_titles = hunt_for_field_values_mapped sheet, :"organisms.title", @samples_mapping
strain_titles = hunt_for_field_values_mapped sheet, :"strains.title", @samples_mapping
genotype_titles = hunt_for_field_values_mapped sheet, :"specimens.genotype.title", @samples_mapping
genotype_modifications = hunt_for_field_values_mapped sheet, :"specimens.genotype.modification", @samples_mapping
Rails.logger.warn "$$$$$$$$$$$$$$ specimen_titles #{specimen_titles}"
Rails.logger.warn "$$$$$$$$$$$$$$ specimen_sexes #{specimen_sexes}"
Rails.logger.warn "$$$$$$$$$$$$$$ specimen_ages #{specimen_ages}"
Rails.logger.warn "$$$$$$$$$$$$$$ specimen_age_units #{specimen_age_units}"
Rails.logger.warn "$$$$$$$$$$$$$$ organism_titles #{organism_titles}"
Rails.logger.warn "$$$$$$$$$$$$$$ strain_titles #{strain_titles}"
Rails.logger.warn "$$$$$$$$$$$$$$ genotype_titles #{genotype_titles}"
Rails.logger.warn "$$$$$$$$$$$$$$ genotype_modifications #{genotype_modifications}"
specimen_data = specimen_titles.
zip(specimen_sexes, specimen_ages, specimen_age_units,
specimen_comments, organism_titles, strain_titles, genotype_titles, genotype_modifications).
map do |specimen_title, specimen_sex, specimen_age, specimen_age_unit,
specimen_comment, organism_title, strain_title, genotype_title, genotype_modification |
{:specimen_title => specimen_title, :specimen_sex => specimen_sex, :specimen_age => specimen_age, :specimen_age_unit => specimen_age_unit,
:specimen_comment => specimen_comment, :organism_title => organism_title, :strain_title => strain_title,
:genotype_title => genotype_title, :genotype_modification => genotype_modification}
end
Rails.logger.warn "$$$$$$$$$$$$ SPECIMEN DATA (#{specimen_data.length}) #{specimen_data}"
build_all_specimen_mock_json specimen_data
unless @to_populate
specimen_titles.each do |s|
@specimen_names[s[:row]] = s[:value]
end
end
# populate samples
if @samples_mapping[:add_samples]
sample_titles = hunt_for_field_values_mapped sheet, :"samples.title", @samples_mapping
sample_types = hunt_for_field_values_mapped sheet, :"samples.sample_type", @samples_mapping
sample_donation_dates = hunt_for_field_values_mapped sheet, :"samples.donation_date", @samples_mapping
sample_comments = hunt_for_field_values_mapped sheet, :"samples.comments", @samples_mapping
sample_organism_parts = hunt_for_field_values_mapped sheet, :"samples.organism_part", @samples_mapping
tissue_and_cell_types = hunt_for_field_values_mapped sheet, :"tissue_and_cell_types.title", @samples_mapping
sop_titles = hunt_for_field_values_mapped sheet, :"sop.title", @samples_mapping
institution_titles = hunt_for_field_values_mapped sheet, :"institution.title", @samples_mapping
samples_data = sample_titles.zip(sample_types, sample_donation_dates, sample_comments, sample_organism_parts, tissue_and_cell_types, sop_titles, institution_titles, specimen_titles).map do |sample_title, sample_type, sample_donation_date, sample_comment, sample_organism_part, tissue_and_cell_type, sop_title, institution_title, specimen_title|
{:sample_title => sample_title, :sample_type => sample_type, :sample_donation_date => sample_donation_date, :sample_comment => sample_comment, :sample_organism_part => sample_organism_part,
:tissue_and_cell_type => tissue_and_cell_type, :sop_title => sop_title, :institution_title => institution_title, :specimen_title => specimen_title}
end
Rails.logger.warn "$$$$$$$$$$$$$$ samples_comments #{sample_comments}"
Rails.logger.warn "$$$$$$$$$$$$ SAMPLES DATA (#{samples_data.length}) : ##{samples_data}"
build_all_sample_mock_json samples_data
unless @to_populate
sample_titles.each do |s|
@sample_names[s[:row]] = s[:value]
end
end
end
end
#extract RNA and sequencing from the file
#rna_protocols = hunt_for_field_values sheet, "RNA Extraction"
#sequencing_protocols = hunt_for_field_values sheet, "Sequencing"
#rna_attribute_names= []
#sequencing_attribute_names =[]
#rna_attribute_names = get_attribute_names sheet, rna_protocols.first.attributes["row"].to_i, rna_protocols.first.attributes["column"].to_i, "RNA Extraction" unless rna_protocols.blank?
#sequencing_attribute_names = get_attribute_names sheet, sequencing_protocols.first.attributes["row"].to_i, sequencing_protocols.first.attributes["column"].to_i, "Sequencing" unless sequencing_protocols.blank?
#rna_protocols.each do |rna_p|
# set_rna_extractions sheet, rna_p, rna_attribute_names unless rna_p == rna_protocols[0] || rna_p == rna_protocols[1]
#end
#sequencing_protocols.each do |sq|
# set_sequencing sheet, sq, sequencing_attribute_names unless sq == sequencing_protocols[0] || sq == sequencing_protocols[1]
#end
end
# creates a hash map like the one we would like to get from a google refine json export
def build_mock_json_import
mock_json = {}
mock_json["assay"] = @assay_json
mock_json_rows = []
rows = (@start_row .. @start_row+@num_rows).to_a
rows.each do |row|
specimen = specimens[row]
sample = samples[row]
treatment = treatments[row]
mock_json_rows << {"specimen" => specimen, "sample" => sample, "treatment" => treatment}
end
mock_json["rows"] = mock_json_rows
Rails.logger.warn "$$$$$$$$$$$$$$$$$$$$ JSON: #{mock_json}"
mock_json
end
# takes the intermediate hash map format and populates the database with this data
def populate_db data
assay = nil
assay_json = data["assay"]
if assay_json
if assay_json["creator last name"] && assay_json["creator first name"] #&& assay_json["creator email"]
set_creator data["assay"]
else
@creator = Person.find(User.current_user.person_id)
end
if assay_json["investigation title"] &&
assay_json["assay type title"] &&
assay_json["study title"]
assay = populate_assay data["assay"], @file.content_blob.original_filename
end
else
@creator = Person.find(User.current_user.person_id)
end
data["rows"].each do |data_row|
specimen_json = data_row["specimen"]
sample_json = data_row["sample"]
treatment_json = data_row["treatment"]
sample = nil
specimen = nil
if specimen_json
specimen = populate_specimen specimen_json
end
if sample_json && specimen
sample = populate_sample sample_json, specimen, assay
end
if treatment_json && (specimen || sample)
populate_treatment treatment_json, specimen, sample
end
end
end
def set_creator assay_json
creator_email = assay_json["creator email"]
creator_last_name = assay_json["creator last name"]
creator_first_name = assay_json["creator first name"]
creator_name = "#{creator_first_name} #{creator_last_name}"
#@creator = Person.find_by_first_name_and_last_name_and_email(creator_first_name,creator_last_name,creator_email)
@creator = Person.find_by_first_name_and_last_name(creator_first_name, creator_last_name)
unless @creator
@warnings << "Warning: Person #{creator_name}(#{creator_email}) cannot be found. Please register in SEEK. Will use uploader as creator.<br/>"
@creator = Person.find(User.current_user.person_id)
#raise @errors
end
end
def set_treatments sheet, treatment_protocol, treatment_attribute_names # not used anymore
sheet_name = sheet.attributes["name"]
row = treatment_protocol.attributes["row"].to_i
col = treatment_protocol.attributes["column"].to_i
end_col = get_end_column sheet, get_next_table_name(sheet, "Treatment")
row_value = {}
start_row = row
sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row = #{start_row} and @column >= #{col} and @column <= #{end_col}]").collect do |cell|
cell_col = cell.attributes["column"].to_i
row_value[treatment_attribute_names[cell_col]] = cell.content.tr('""', "")
end
@treatments_text[start_row] = row_value
end
def set_rna_extractions sheet, rna_extraction_protocol, rna_attribute_names # not used anymore
sheet_name = sheet.attributes["name"]
row = rna_extraction_protocol.attributes["row"].to_i
col = rna_extraction_protocol.attributes["column"].to_i
end_col = get_end_column sheet, get_next_table_name(sheet, "RNA Extraction")
row_value = {}
start_row = row
sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row = #{start_row} and @column >= #{col} and @column <= #{end_col}]").collect do |cell|
cell_col = cell.attributes["column"].to_i
row_value[rna_attribute_names[cell_col]] = cell.content.tr('""', "")
end
@rna_extractions[start_row] = row_value
end
def set_sequencing sheet, sequencing_protocol, sequencing_attribute_names # not used anymore
sheet_name = sheet.attributes["name"]
row = sequencing_protocol.attributes["row"].to_i
col = sequencing_protocol.attributes["column"].to_i
end_col = get_end_column sheet, get_next_table_name(sheet, "Sequencing")
row_value = {}
start_row = row
sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row = #{start_row} and @column >= #{col} and @column <= #{end_col}]").collect do |cell|
cell_col = cell.attributes["column"].to_i
row_value[sequencing_attribute_names[cell_col]] = cell.content.tr('""', "")
end
@sequencing[start_row] = row_value
end
def build_all_treatment_mock_json treatment_data_list
treatment_data_list.each do |treatment_data|
build_treatment_mock_json treatment_data
end
end
def build_treatment_mock_json treatment_data
treatment_protocol = treatment_data[:protocol][:value]
substance = treatment_data[:substance][:value]
concentration = treatment_data[:concentration][:value]
unit = treatment_data[:unit][:value]
incubation_time = treatment_data[:incubation_time][:value]
incubation_time_unit = treatment_data[:incubation_time_unit][:value]
type = treatment_data[:type][:value]
comments = treatment_data[:comments][:value]
row = treatment_data[:protocol][:row]
treatment = {"type" => type,
"start value" => concentration,
"end value" => nil,
"unit" => unit,
"standard deviation" => nil,
"comments" => comments,
"protocol" => treatment_protocol,
"incubation time" => incubation_time,
"incubation time unit" => incubation_time_unit,
"compound" => substance}
@treatments[row] = treatment
@treatments_text[row] = "Treatment Protocol:#{treatment_protocol}, Unit:#{unit}, Concentration:#{concentration}, Substance:#{substance}"
Rails.logger.warn "add treatment, row = #{row} : #{treatment}"
treatment
end
def populate_treatment treatment_json, specimen=nil, sample=nil
start_value = treatment_json["start value"]
end_value = treatment_json["end value"]
standard_deviation = treatment_json["standard deviation"]
comments = treatment_json["comments"]
protocol = treatment_json["protocol"]
treatment = nil
#if start_value && start_value != ""
treatment_type = MeasuredItem.find_by_title(treatment_json["type"].downcase)
treatment_type = MeasuredItem.create :title => treatment_json["type"].downcase, :factors_studied => false unless treatment_type
compound = nil
if treatment_type.title == "concentration"
compound_name = treatment_json["compound"]
if compound_name != "" && compound_name != "none"
compound = Compound.find_by_name(treatment_json["compound"].downcase)
compound = Compound.create :name => treatment_json["compound"].downcase unless compound
end
end
incubation_time = treatment_json["incubation time"]
incubation_time_unit = nil
if incubation_time && incubation_time != ""
incubation_time_unit = Unit.find_by_symbol treatment_json["incubation time unit"]
incubation_time_unit = Unit.create :symbol => treatment_json["incubation time unit"], :factors_studied => false unless incubation_time_unit
end
unit = Unit.find_by_symbol treatment_json["unit"]
unit = Unit.create :symbol => treatment_json["unit"], :factors_studied => false unless unit
#treatment = Treatment.find(:first, :conditions => ["treatment_protocol = ? and unit_id = ? and substance = ? and cast(concentration as char) = ?", treatment_json["protocol"], unit.id, treatment_json["compound"], treatment_json["start value"]])
#treatment = Treatment.new :substance => treatment_json["compound"], :concentration => treatment_json["start value"], :unit_id => unit.id, :treatment_protocol => treatment_json["protocol"] unless treatment
def nil_or_float o
o.nil? ? nil : o.to_f
end
#rails 3
#treatment = Treatment.where(["treatment_protocol = ? and unit_id = ? and substance = ? and cast(concentration as char) = ?", treatment_protocol, unit.id, substance, concentration]).first
treatment = Treatment.find(:first, :conditions => ["unit_id <=> ? and treatment_protocol <=> ? and treatment_type_id <=> ? and cast(start_value as char) <=> ? and cast(end_value as char) <=> ? and
cast(standard_deviation as char) <=> ? and comments <=> ? and cast(incubation_time as char) <=> ? and incubation_time_unit_id <=> ? and compound_id <=> ? and specimen_id <=> ?",
unit, protocol, treatment_type, nil_or_float(start_value), nil_or_float(end_value), nil_or_float(standard_deviation), comments, nil_or_float(incubation_time), incubation_time_unit, compound, specimen])
treatment = Treatment.new :treatment_type => treatment_type, :start_value => start_value, :end_value => end_value, :unit => unit, :standard_deviation => standard_deviation,
:comments => comments, :treatment_protocol => protocol, :incubation_time => incubation_time, :incubation_time_unit => incubation_time_unit,
:compound => compound, :specimen => specimen, :sample => sample unless treatment
treatment.save!
if specimen and !specimen.treatments.include? treatment
specimen.treatments << treatment
end
if sample and !sample.treatments.include? treatment
sample.treatments << treatment
end
#end
treatment
end
def build_all_specimen_mock_json specimen_data_list
specimen_data_list.each do |specimen_data|
build_specimen_mock_json specimen_data
end
end
def build_specimen_mock_json specimen_data
specimen_title = specimen_data[:specimen_title][:value]
sex = specimen_data[:specimen_sex][:value]
organism_title = specimen_data[:organism_title][:value]
strain_title = specimen_data[:strain_title][:value]
age = specimen_data[:specimen_age][:value].to_i
age_unit = specimen_data[:specimen_age_unit][:value]
comments = specimen_data[:specimen_comment][:value]
genotype_title = specimen_data[:genotype_title][:value]
genotype_modification = specimen_data[:genotype_modification][:value]
row = specimen_data[:specimen_title][:row]
specimen = {"title" => specimen_title,
"organism" => organism_title,
"strain" => strain_title,
"sex" => sex,
"age" => age,
"age unit" => age_unit,
"comments" => comments,
"genotype title" => genotype_title,
"genotype modification" => genotype_modification}
@specimens[row] = specimen
@specimen_names[row] = specimen_title
Rails.logger.warn "add specimen, row = #{row} : #{specimen}"
specimen
end
def populate_specimen specimen_json
specimen_title = specimen_json["title"]
sex = specimen_json["sex"]
age = specimen_json["age"]
age_unit = specimen_json["age unit"]
organism_title = specimen_json["organism"]
strain_title = specimen_json["strain"]
comments = specimen_json["comments"]
genotype_title = specimen_json["genotype title"]
genotype_modification = specimen_json["genotype modification"]
case sex
when "male"
sex = 0
when "female"
sex = 1
when "hermaphrodite"
sex = 2
when "unknown"
sex = nil
else
sex = nil
end
#age = try_block{ sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row=#{row} and @column=#{col+4}]").first.content.tr('""', "").to_i}
#age_time_unit = try_block{ sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row=#{row} and @column=#{col+5}]").first.content.tr('""', "")}
organism = Organism.find_by_title organism_title
strain = Strain.find_by_title strain_title
culture_growth_type = CultureGrowthType.find_by_title "in vivo"
unless organism
organism = Organism.new :title => organism_title
organism.save!
end
strain = Strain.new :title => strain_title unless strain
strain.organism = organism
strain.projects = @file.projects
strain.save!
specimen = Specimen.find_by_title specimen_title
institution = Institution.find_by_title @institution_title
unless specimen
specimen = Specimen.new :title => specimen_title, :lab_internal_number => specimen_title
specimen.sex = sex
specimen.age = age
specimen.age_unit = age_unit
specimen.institution = institution #@creator.institutions.first if @creator
specimen.strain = strain
specimen.culture_growth_type= culture_growth_type
specimen.policy = @file.policy.deep_copy
specimen.projects = @file.projects
specimen.comments = comments
specimen.creators << @creator
specimen.save!
else
unless specimen.organism == organism &&
specimen.strain == strain &&
specimen.sex == sex &&
specimen.age == age &&
specimen.age_unit == age_unit
sleep(1)
new_sp = specimen.dup
now = Time.now
new_sp.title = "#{specimen_title}-#{now}"
new_sp.contributor = User.current_user
new_sp.projects = specimen.projects
new_sp.created_at = now
new_sp.policy = @file.policy.deep_copy
new_sp.save!
@warnings << "Warning: #{t('biosamples.sample_parent_term')} with the name '#{h(specimen_title)}' in row #{row} is already created in SEEK.<br/>".html_safe
@warnings << "It is renamed and saved as '#{h(new_sp.title)}'.<br/>".html_safe
@warnings << "You may rename it and upload the file as new version!<br/>".html_safe
Rails.logger.warn @warnings
specimen = new_sp
else
if !specimen.can_view?(User.current_user)
@warnings << "Warning: #{t('biosamples.sample_parent_term')} with the name '#{h(specimen_title)}' in row #{row_num} is already created in SEEK.<br/>".html_safe
@warnings << "But you are not authorized to view it. You can contact '#{h(specimen.contributor.person.name)} for authorizations'<br/>".html_safe
end
end
end
unless genotype_title == "none"
gene = Gene.find_by_title genotype_title
gene = Gene.new :title => genotype_title, :symbol => genotype_title unless gene
gene.save!
modification = Modification.find_by_title genotype_modification
modification = Modification.new :title => genotype_modification, :symbol => genotype_modification unless modification
modification.save!
genotype = Genotype.where(["gene_id = ? and modification_id = ? and specimen_id = ? and strain_id = ?", gene.id, modification.id, specimen.id, strain.id]).first
genotype = Genotype.new :gene_id => gene.id, :modification_id => modification.id, :specimen_id => specimen.id unless genotype
genotype.save!
end
specimen
end
def build_all_sample_mock_json sample_data_list
sample_data_list.each do |sample_data|
build_sample_mock_json sample_data
end
end
def build_sample_mock_json sample_data
sample_title = sample_data[:sample_title][:value]
sample_type = sample_data[:sample_type][:value]
tissue_and_cell_type_title = sample_data[:tissue_and_cell_type][:value]
sop_title = sample_data[:sop_title][:value]
donation_date = sample_data[:sample_donation_date][:value]
institution_title = sample_data[:institution_title][:value]
comments = sample_data[:sample_comment][:value]
organism_part = sample_data[:sample_organism_part][:value]
row = sample_data[:sample_title][:row]
sample = {"title" => sample_title,
"type" => sample_type,
"tissue and cell type" => tissue_and_cell_type_title,
"sop" => sop_title,
"donation date" => donation_date.to_s,
"institution" => institution_title,
"comments" => comments,
"organism part" => organism_part}
@samples[row] = sample
@sample_names[row] = sample_title
Rails.logger.warn "add sample, row = #{row}"
sample
end
def populate_sample sample_json, specimen, assay=nil
sample_title = sample_json["title"]
sample_type = sample_json["type"]
tissue_and_cell_type_title = sample_json["tissue and cell type"]
sop_title = sample_json["sop"]
donation_date = sample_json["donation date"] + " UTC +00.00"
institution_title = sample_json["institution"]
comments = sample_json["comments"]
organism_part = sample_json["organism part"]
sop_title = nil if sop_title=="NO STORAGE"
institution_title = @institution_title if (institution_title=="" || institution_title.nil?)
#Rails.logger.warn "TISSUE AND CELL TYPE TITLE : #{tissue_and_cell_type_title}"
tissue_and_cell_type = TissueAndCellType.find_by_title tissue_and_cell_type_title
unless tissue_and_cell_type
if tissue_and_cell_type_title && tissue_and_cell_type_title != ""
tissue_and_cell_type = TissueAndCellType.create :title => tissue_and_cell_type_title
tissue_and_cell_type.save!
end
end
sop = Sop.find_by_title sop_title
institution = Institution.find_by_title institution_title
#specimen_title = @specimen_names[row]
#specimen = Specimen.find_by_title specimen_title
#comments = @sample_comments.detect { |comments| comments.attributes["row"].to_i == row }.try(:content)
sample = Sample.find_by_title sample_title
unless sample
sample = Sample.new :title => sample_title,
:lab_internal_number => sample_title
sample.projects = @file.projects #User.current_user.person.projects
#treatment = ""
#@treatments_text[row].try(:each) do |k, v|
# treatment << k.to_s + ":" + v.to_s
# treatment << "," unless k == @treatments_text[row].keys.last
#end
#treatment = @treatments_text[row] ? @treatments_text[row] : ""
treatment = "" # will be linked to 0 ... n treatments anyway
sample.sample_type = sample_type
sample.donation_date = Time.zone.parse(donation_date).utc
sample.institution = institution
sample.tissue_and_cell_types << tissue_and_cell_type if tissue_and_cell_type.try(:id) && !sample.tissue_and_cell_types.include?(tissue_and_cell_type)
sample.associate_sop sop if sop
sample.specimen = specimen if specimen
sample.organism_part = organism_part if organism_part != ""
sample.comments = comments
sample.treatment = treatment
sample.policy = @file.policy.deep_copy
sample.creators << @creator
sample.save!
else
unless sample.specimen == specimen &&
sample.sample_type == sample_type &&
(sample.tissue_and_cell_types.member?(tissue_and_cell_type) || tissue_and_cell_type_title == "") &&
sample.donation_date == Time.zone.parse(donation_date).utc &&
sample.institution == institution &&
sample.comments == comments
sleep(1);
sample.title = "#{sample_title}-#{Time.now}"
sample.save!
Rails.logger.warn "sample update-branch".html_safe
Rails.logger.warn "#{sample.specimen} == #{specimen} ? #{sample.specimen == specimen}".html_safe
Rails.logger.warn "#{sample.sample_type} == #{sample_type} ? #{sample.sample_type == sample_type}".html_safe
Rails.logger.warn "(sample.tissue_and_cell_types.member?(tissue_and_cell_type) || tissue_and_cell_type_title == "") ? #{(sample.tissue_and_cell_types.member?(tissue_and_cell_type) || tissue_and_cell_type_title == "")}".html_safe
Rails.logger.warn "#{sample.donation_date} == #{Time.zone.parse(donation_date).utc} ? #{sample.donation_date == Time.zone.parse(donation_date).utc}".html_safe
Rails.logger.warn "#{sample.institution} == #{institution} ? #{sample.institution == institution}".html_safe
Rails.logger.warn "#{sample.comments} == #{comments} ? #{sample.comments == comments}".html_safe
@warnings << "Warning: sample with the name '#{sample_title}' is already created in SEEK.".html_safe
@warnings << "It is renamed and saved as '#{sample.title}'.<br/>".html_safe
@warnings << "You may rename it and upload the file as new version!<br/>".html_safe
else
if !sample.can_view?(User.current_user)
@warnings << "Warning: Sample with the name '#{sample_title}' in row #{row} is already created in SEEK.<br/>".html_safe
@warnings << "But you are not authorized to view it. You can contact '#{sample.contributor.person.name} for authorizations'<br/>".html_safe
end
end
end
if assay
unless assay.samples.include?(sample)
assay.samples << sample
assay.save!
end
else
Rails.logger.warn "no #{t('assays.assay').downcase} defined for samples"
@file.lock!
unless @file.samples.include?(sample)
@file.samples << sample
@file.save!
end
end
sample
end
def set_sample_names sample_name_cells # not used anymore
sample_name_cells.each do |sample_name_cell|
row = sample_name_cell.attributes["row"].to_i
sample_title = sample_name_cell.content
@sample_names[row] = sample_title
end
end
def set_specimen_names specimen_name_cells # not used anymore
specimen_name_cells.each do |specimen_name_cell|
row = specimen_name_cell.attributes["row"].to_i
specimen_title = specimen_name_cell.content
@specimen_names[row] = specimen_title
end
end
def hunt_for_horizontal_field_value sheet, field_name
sheet_name=sheet.attributes["name"]
field_cell = sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell").find do |cell|
#cell.content.match(/#{field_name}.*/i)
cell.content.downcase == field_name.downcase
end
unless field_cell.nil?
#find the next column for this row that contains content #bittkomk: why?
row = field_cell.attributes["row"].to_i
col = field_cell.attributes["column"].to_i
field_cell = sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row=#{row} and @column=#{col+1}]").first
end
field_cell.nil? ? nil : field_cell.content
end
# hunts for a vector of field values given a field name (= header of a column)
# offset of the first data row in respect of header row is calculated using :data_row_offset given in @parser_mapping
# probing_num_rows = true means that the number of non-blank rows contained in a specified probing column is used to get the correct value for @num_rows
# for this purpose the probing column should not have any blank rows in between
def hunt_for_field_values sheet, field_name , probing_num_rows = false
sheet_name=sheet.attributes["name"]
field_cell = sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell").find do |cell|
#cell.content.match(/^#{field_name}$/i)
cell.content.downcase == field_name.downcase
end
unless field_cell.nil?
#find the next column for this row that contains content
row = field_cell.attributes["row"].to_i
col = field_cell.attributes["column"].to_i
start_row = row + @parser_mapping[:data_row_offset] - 1 # subtracting 1 here gives us a clearer semantic for data_row_offset in the parser mappings. data_row_offset means "add this number to header row to get to first data row"
#start_row = row + 1
#start_row = row if ["RNA Extraction", "Sequencing"].include? field_name
#start_row = row + 2 if ["Treatment", "Optional"].include? field_name
#row +1 is source name or sample name that is hidden in the file
if probing_num_rows
field_values = sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row > #{start_row} and @column=#{col}]").select do |cell|
!cell.content.blank?
end
@start_row = start_row + 1 #that is the first row with data, cf. the condition in the xpath @row > #{start_row}
else # if probing_num_rows == false we assume that @num_rows has been set to the correct value
field_values = sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row > #{start_row} and @row <= #{start_row + @num_rows} and @column=#{col}]").collect {|cell| cell }
end
end
field_values
end
def hunt_for_horizontal_field_value_mapped sheet, field_name, mapping
Array(mapping[field_name][:value].call((hunt_for_horizontal_field_value(sheet, mapping[field_name][:column])))).map { |it| {:value => it}}
end
# this is the most important method to get data out of the spreadsheet
# basically it's just a wrapper for hunt_for_field_values using a mapping to get the correct field name (= column header) to extract data from
# the received data is mapped to an array of hashed containing :value and :row -- the value assigned to :value is calculated using the block specified in the mapping for this field name
# if there are less rows in the result than specified by @num_rows then missing rows are augmented with some default value (see augment_missing rows)
# if there are no results returned by hunt_for_field_values then this case is handled differently for columns that are specified as FIXED in the mapping and for columns that are not
# in any case it is ensured that the method returns @num_rows hashes of :value and :row
def hunt_for_field_values_mapped sheet, field_name, mapping, probing_num_rows = false
field_values = hunt_for_field_values sheet, mapping[field_name][:column], probing_num_rows
if field_values && !field_values.empty?
if probing_num_rows
@num_rows = field_values.length
end
field_values.map! { |it| {:value => mapping[field_name][:value].call(it.content.tr('""', "")), :row => it.attributes["row"].to_i}}
if field_values.length < @num_rows
field_values = augment_missing_rows field_values, mapping[field_name][:value].call("") # this gives us the opportunity to fill in any default values defined as proc {"something"} in the mapping
end
field_values
else
if mapping[field_name][:column] == "FIXED"
values = [mapping[field_name][:value].call()]*@num_rows
rows = (@start_row .. @start_row+@num_rows).to_a
values.zip(rows).map { |value, row| {:value => value, :row => row}}
else
Rails.logger.warn "Warning, empty field values list for field_name = #{field_name} returned!"
values = [""]*@num_rows
rows = (@start_row .. @start_row+@num_rows).to_a
values.zip(rows).map { |value, row| {:value => value, :row => row}}
end
end
end
# this adds missing rows to the array field_values given an expected number rows as specified in @num_rows
# added rows contain the right row number and some default value
# finally the whole array is resorted by :row to ensure that field_values is ordered by row number
def augment_missing_rows field_values, default_value=""
rows = (@start_row .. @start_row+@num_rows).to_a
rows.each do |row|
unless field_values.find {|fv| fv[:row] == row}
field_values << {:value => default_value, :row => row}
end
end
field_values.sort {|a,b| a[:row] <=> b[:row]}
end
def get_attribute_names sheet, row, col, table_name # still used?
sheet_name = sheet.attributes["name"]
end_col = get_end_column sheet, get_next_table_name(sheet, table_name)
attribute_names = {}
sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row = #{row} and @column >= #{col} and @column <= #{end_col}]").collect do |cell|
cell_col = cell.attributes["column"].to_i
attribute_names[cell_col] = cell.content
end
return attribute_names
end
def get_end_column sheet, next_table_name=nil # still used?
sheet_name = sheet.attributes["name"]
if next_table_name
field_cell = sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell").find do |cell|
cell.content.match(/#{next_table_name}.*/i)
end
unless field_cell.nil?
#find the next column for this row that contains content
end_col = field_cell.attributes["column"].to_i - 1
end
else
end_col = try_block{ sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row = 1 and @column > 0]").collect.last.attributes["column"].to_i}
end
return end_col
end
def get_next_table_name sheet, current_table_name # still used?
sheet_name = sheet.attributes["name"]
table_name_row = 3
table_names = sheet.find("//ss:sheet[@name='#{sheet_name}']/ss:rows/ss:row/ss:cell[@row = #{table_name_row} and @column > 0]").select do |cell|
!cell.content.blank?
end
current_table_cell = table_names.detect { |t| t.content == current_table_name }
index = table_names.index current_table_cell
return table_names[index+1].content
end
end
end
end