lib/biotcm/table.rb
module BioTCM
# One of the basic data models used in BioTCM to process
# {https://github.com/biotcm/biotcm/wiki/Formats#tab .tab files},
# developed under <b>"strict entry and tolerant exit"</b> philosophy.
#
# For more details, please refer to the test.
class Table
# Version
VERSION = '0.6.1'.freeze
# Primary key
attr_reader :primary_key
# Comments
attr_reader :comments
# @private
# Factory method
# @return [Table]
def self.build(primary_key: nil, row_keys: {}, col_keys: {}, content: [], comments: [])
@tab = new
@tab.instance_variable_set(:@primary_key, primary_key)
@tab.instance_variable_set(:@row_keys, row_keys)
@tab.instance_variable_set(:@col_keys, col_keys)
@tab.instance_variable_set(:@content, content)
@tab.instance_variable_set(:@comments, comments)
@tab
end
# Load a table from a file
# @param filepath [String]
# @param encoding [String]
# @param seperator [String]
# @return [Table]
def self.load(filepath, encoding: Encoding.default_external, seperator: "\t")
raise ArgumentError, 'Illegal argument type for Table.load' unless filepath.is_a?(String)
File.open(filepath, "r:#{encoding}").read.to_table(seperator: seperator)
end
# Create an empty table with keys
# @param primary_key [String]
# @param row_keys [Array]
# @param col_keys [Array]
def initialize(primary_key: nil, row_keys: [], col_keys: [], comments: [])
@primary_key = primary_key
@row_keys = row_keys.map.with_index { |r, ri| [r, ri] }.to_h
@col_keys = col_keys.map.with_index { |c, ci| [c, ci] }.to_h
@content = row_keys.collect { col_keys.collect { '' } }
@comments = comments
end
# Clone this table
# @return [Table]
def clone
self.class.build(
primary_key: @primary_key,
row_keys: @row_keys.clone,
col_keys: @col_keys.clone,
content: @content.collect(&:clone),
comments: @comments.clone
)
end
# Set the primary key
# @param val [String]
def primary_key=(val)
@primary_key = val.nil? ? nil : val.to_s
end
# Get row keys
# @return [Array]
def row_keys
@row_keys.keys
end
# Set row keys
# @param val [Array]
def row_keys=(val)
raise ArgumentError, 'Illegal agrument type' unless val.is_a?(Array)
raise ArgumentError, 'Unmatched size' if val.size < @row_keys.size
@row_keys = val.map.with_index { |v, i| [v, i] }.to_h
end
# Get col keys
# @return [Array]
def col_keys
@col_keys.keys
end
# Set col keys
# @param val [Array]
def col_keys=(val)
raise ArgumentError, 'Illegal agrument type' unless val.is_a?(Array)
raise ArgumentError, 'Unmatched size' if val.size < @col_keys.size
@col_keys = val.map.with_index { |v, i| [v, i] }.to_h
end
# Set comments
# @param val [Array/String]
def comments=(val)
if val.respond_to?(:collect)
@comments = val.map(&:to_s)
elsif val.respond_to?(:to_s)
@comments = [val.to_s]
end
end
# Access an element
# @overload ele(row, col)
# Get an element
# @param row [String]
# @param col [String]
# @return [String]
# @overload ele(row, col, val)
# Set an element
# @param row [String]
# @param col [String]
# @param val [String]
# @return [Table]
def ele(row, col, val = nil)
if val.nil?
get_ele(row, col)
else
set_ele(row, col, val)
end
end
# Get an element
# @param row [String]
# @param col [String]
# @return [String]
def get_ele(row, col)
row = @row_keys[row]
col = @col_keys[col]
row && col ? @content[row][col] : nil
end
# Set an element
# @param row [String]
# @param col [String]
# @param val [String]
# @return [Table]
def set_ele(row, col, val)
unless row.is_a?(String) && col.is_a?(String) && val.respond_to?(:to_s)
raise ArgumentError, 'Illegal argument type'
end
set_row(row, [''] * @col_keys.size) unless @row_keys[row]
set_col(col, [''] * @row_keys.size) unless @col_keys[col]
row = @row_keys[row]
col = @col_keys[col]
@content[row][col] = val.to_s
self
end
# Access a row
# @overload row(row)
# Get a row
# @param row [String]
# @return [Hash]
# @overload row(row, val)
# Set a row
# @param row [String]
# @param val [Hash, Array]
# @return [Table]
def row(row, val = nil)
if val.nil?
get_row(row)
else
set_row(row, val)
end
end
# Get a row
# @param row [String]
# @return [Hash]
def get_row(row)
row = @row_keys[row]
row.nil? ? nil : @col_keys.map { |c, ci| [c, @content[row][ci]] }.to_h
end
# Set a row
# @param row [String]
# @param val [Hash, Array]
# @return [Table]
def set_row(row, val)
# Setter
if !row.is_a?(String) || (!val.is_a?(Hash) && !val.is_a?(Array))
raise ArgumentError, 'Illegal argument type'
elsif val.is_a?(Array) && val.size != col_keys.size
raise ArgumentError, 'Column size not match'
end
case val
when Array
if @row_keys[row]
row = @row_keys[row]
@content[row] = val
else
@row_keys[row] = @row_keys.size
@content << val
end
when Hash
unless @row_keys[row]
@row_keys[row] = @row_keys.size
@content << ([''] * @col_keys.size)
end
row = @row_keys[row]
val.each do |k, v|
col = @col_keys[k]
@content[row][col] = v if col
end
end
self
end
# Access a column
# @overload col(col)
# Get a column
# @param col [String]
# @return [Hash]
# @overload col(col, val)
# Set a column
# @param col [String]
# @param val [Hash, Array]
# @return [Table]
def col(col, val = nil)
if val.nil?
get_col(col)
else
set_col(col, val)
end
end
# Get a column
# @param col [String]
# @return [Hash]
def get_col(col)
col = @col_keys[col]
col.nil? ? nil : @row_keys.map { |r, ri| [r, @content[ri][col]] }.to_h
end
# Set a column
# @param col [String]
# @param val [Hash, Array]
# @return [Table]
def set_col(col, val)
if !col.is_a?(String) || (!val.is_a?(Hash) && !val.is_a?(Array))
raise ArgumentError, 'Illegal argument type'
elsif val.is_a?(Array) && val.size != row_keys.size
raise ArgumentError, 'Row size not match'
end
case val
when Array
if @col_keys[col]
col = @col_keys[col]
val.each_with_index { |v, row| @content[row][col] = v }
else
col = @col_keys[col] = @col_keys.size
val.each_with_index { |v, row| @content[row] << v }
end
when Hash
unless @col_keys[col]
@col_keys[col] = @col_keys.size
@content.each { |arr| arr << '' }
end
col = @col_keys[col]
val.each do |k, v|
row = @row_keys[k]
@content[row][col] = v if row
end
end
self
end
# Iterate by row
def each_row
if block_given?
@row_keys.each_key { |r| yield(r, row(r)) }
self
else
Enumerator.new do |y|
@row_keys.each_key { |r| y << [r, row(r)] }
end
end
end
# Iterate by col
def each_col
if block_given?
@col_keys.each_key { |c| yield(c, col(c)) }
self
else
Enumerator.new do |y|
@col_keys.each_key { |c| y << [c, col(c)] }
end
end
end
# Select row(s) to build a new table
# @param rows [Array]
# @return [Table]
def select_row(rows)
select(rows, :all)
end
# Select column(s) to build a new table
# @param cols [Array]
# @return [Table]
def select_col(cols)
select(:all, cols)
end
# Select row(s) and column(s) to build a new table
# @param rows [Array]
# @param cols [Array]
# @return [Table]
def select(rows, cols)
# Prune rows
if rows == :all
row_keys = @row_keys.clone
content = @content.collect(&:clone)
else
raise ArgumentError, 'Illegal argument type' unless rows.is_a?(Array)
row_keys = {}
(rows & @row_keys.keys).each { |row| row_keys[row] = row_keys.size }
content = []
row_keys.each_key { |row| content << @content[@row_keys[row]] }
end
# Prune columns
if cols == :all
col_keys = @col_keys.clone
else
raise ArgumentError, 'Illegal argument type' unless cols.is_a?(Array)
col_keys = {}
(cols & @col_keys.keys).each { |col| col_keys[col] = col_keys.size }
eval 'content.collect! { |arr| [' + col_keys.keys.collect { |col| "arr[#{@col_keys[col]}]" }.join(',') + '] }' # rubocop:disable Lint/Eval
end
# Create a new table
self.class.build(
primary_key: primary_key,
row_keys: row_keys,
col_keys: col_keys,
content: content,
comments: comments
)
end
# Merge with another table
# @param tab [Table]
def merge(tab)
raise ArgumentError, 'Only tables could be merged' unless tab.is_a?(self.class)
raise ArgumentError, 'Primary keys not the same' unless tab.primary_key == primary_key
# Empty content
content = []
row_keys = (@row_keys.keys | tab.row_keys).map.with_index { |row, i| [row, i] }.to_h
col_keys = (@col_keys.keys | tab.col_keys).map.with_index { |col, i| [col, i] }.to_h
row_keys.size.times { content << Array.new(col_keys.size, '') }
# rubocop:disable Lint/Eval, Style/SpaceInsideStringInterpolation
# Fill content with self
eval <<-END_OF_DOC
@row_keys.each do |row, old_ri|
new_ri = row_keys[row]
#{
str = []
@col_keys.map do |col, old_ci|
new_ci = col_keys[col]
str << "content[new_ri][#{new_ci}] = @content[old_ri][#{old_ci}]"
end
str.join("\n" + ' ' * 8)
}
end
END_OF_DOC
# Fill content with tab
@content_merged_with = tab.instance_variable_get(:@content)
eval <<-END_OF_DOC
tab.row_keys.each_with_index do |row, old_ri|
new_ri = row_keys[row]
#{
str = []
tab.col_keys.each_with_index do |col, old_ci|
new_ci = col_keys[col]
str << "content[new_ri][#{new_ci}] = @content_merged_with[old_ri][#{old_ci}]"
end
str.join("\n" + ' ' * 8)
}
end
END_OF_DOC
# rubocop:enable Lint/Eval, Style/SpaceInsideStringInterpolation, Lint/UselessAssignment
# Create a new table
self.class.build(
primary_key: primary_key,
row_keys: row_keys,
col_keys: col_keys,
content: content,
comments: comments + tab.comments
)
end
# @private
# For inspection
def inspect
'#<Table primary_key=' + @primary_key.inspect +
' col_keys=' + @col_keys.keys.sort_by { |k| @col_keys[k] }.inspect +
' row_keys=' + @row_keys.keys.sort_by { |k| @row_keys[k] }.inspect +
' content=' + @content.inspect +
' comments=' + @comments.join.inspect +
'>'
end
# @private
# Convert to String
def to_s
[
@comments.collect { |line| '# ' + line },
@primary_key.nil? ? @col_keys.keys.join("\t") : [@primary_key, @col_keys.keys].join("\t"),
@row_keys.keys.zip(@content).collect { |a| a.join("\t") }
].flatten.join("\n")
end
# Print in a file
# @param filepath [String]
# @return [self]
def save(filepath)
File.open(filepath, 'w').puts self
self
end
# {Table}'s extention to core classes
module Extensions
# {Table}'s extention to String
module String
# Create a {BioTCM::Table} based on a String or fill the given table
# @param seperator [String]
def to_table(seperator: "\t")
stuff = split(/\r\n|\n/)
# Comments
comments = []
while stuff[0] =~ /\# /
# Some tables' head lines start with a '#', such as *mim2gene.txt* in OMIM
break if stuff[0] =~ /\# [\w ]+\t/
comments << stuff.shift.gsub(/^\# /, '')
end
# Headline
col_keys = stuff.shift.split(seperator)
raise ArgumentError, 'Duplicated column names' unless col_keys.uniq!.nil?
primary_key = stuff.first && stuff.first.split(seperator, -1).size == col_keys.size + 1 ? nil : col_keys.shift
col_keys = col_keys.map.with_index { |n, i| [n, i] }.to_h
# Table content
row_keys = {}
content = []
stuff.each_with_index do |line, line_index|
col = line.split(seperator, -1)
if col.size != col_keys.size + 1
raise ArgumentError, "Row size inconsistent in line #{line_index + 2}"
elsif row_keys[col[0]]
raise ArgumentError, "Duplicated primary key: #{col[0]}"
end
row_keys[col.shift] = row_keys.size
content << col
end
# Build a table to return
BioTCM::Table.build(
primary_key: primary_key,
row_keys: row_keys,
col_keys: col_keys,
content: content,
comments: comments
)
end
end
end
end
end
String.include(BioTCM::Table::Extensions::String)