lib/genome/importers/entrez/entrez_gene_pathway_importer.rb
module Genome
module Importers
module Entrez
class EntrezGenePathwayImporter
def initialize(tsv_path)
@tsv_path = tsv_path
@entrez_id_hash = {}
@importer = Genome::Importers::Importer.new(nil, source)
preload_genes
end
def import!
File.open(@tsv_path).each_with_index do |line, index|
next if (line.blank? || index == 0)
process_row(EntrezGenePathwayRow.new(line))
if index % 10_000 == 0
puts('Processed ' + index.to_s + ' records')
end
end
@importer.store
end
private
def process_row(row)
left_gene = @entrez_id_hash[row.entrez_gene_id]
right_gene = @entrez_id_hash[row.interactant_gene_id]
unless left_gene && right_gene
# puts "Unable to find entrez genes for #{row.entrez_gene_id} and #{row.interactant_gene_id}"
return
end
if left_gene != right_gene
record_interaction(left_gene, right_gene)
end
end
def record_interaction(left_gene, right_gene)
[
{ gene_id: left_gene.id, interacting_gene_id: right_gene.id },
{ gene_id: right_gene.id, interacting_gene_id: left_gene.id }
].each { |attrs| @importer.create_gene_gene_interaction_claim(attrs) }
end
def preload_genes
entrez_genes = DataModel::GeneClaim.joins(:genes).includes(:genes)
.where(nomenclature: 'Entrez Gene Id')
entrez_genes.each do |gene_claim|
@entrez_id_hash[gene_claim.name] = gene_claim.genes.first
end
end
def source
@source ||= DataModel::Source.where(source_db_name: 'Entrez').first
end
end
end
end
end