ioquatix/build-text

View on GitHub
lib/build/text/merge.rb

Summary

Maintainability
B
5 hrs
Test Coverage
#!/usr/bin/env ruby
# Copyright, 2013, by Samuel G. D. Williams. <http://www.codeotaku.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

module Build
    module Text
        module Merge
            Difference = Struct.new(:type, :value)
            
            def self.combine(old_text, new_text)
                lcs = lcs(old_text, new_text)
                changes = []
                
                n = 0; o = 0; l = 0
                while o < old_text.size and n < new_text.size and l < lcs.size
                    if !similar(old_text[o], lcs[l])
                        changes << Difference.new(:old, old_text[o])
                        o += 1
                    elsif !similar(new_text[n], lcs[l])
                        changes << Difference.new(:new, new_text[n])
                        n += 1
                    else
                        changes << Difference.new(:both, lcs[l])
                        o += 1; n += 1; l += 1
                    end
                end
                
                while o < old_text.size
                    changes << Difference.new(:old, old_text[o])
                    o += 1
                end
                
                while n < new_text.size
                    changes << Difference.new(:old, new_text[n])
                    n += 1
                end
                
                changes.map do |change|
                    change.value
                end
            end

            # This code is based directly on the Text gem implementation
            # Returns a value representing the "cost" of transforming str1 into str2
            def self.levenshtein_distance(s, t)
                n = s.length
                m = t.length

                return m if n == 0
                return n if m == 0

                d = (0..m).to_a
                x = nil

                n.times do |i|
                    e = i+1

                    m.times do |j|
                        cost = (s[i] == t[j]) ? 0 : 1
                        x = [
                            d[j+1] + 1, # insertion
                            e + 1,      # deletion
                            d[j] + cost # substitution
                        ].min
                        d[j] = e
                        e = x
                    end

                    d[m] = x
                end

                return x
            end

            # Calculate the similarity of two sequences, return true if they are with factor% similarity.
            def self.similar(s, t, factor = 0.15)
                return true if s == t
                
                distance = levenshtein_distance(s, t)
                average_length = (s.length + t.length) / 2.0

                proximity = (distance.to_f / average_length)
                
                return proximity <= factor
            end

            LCSNode = Struct.new(:value, :previous)
            
            # Find the Longest Common Subsequence in the given sequences x, y.
            def self.lcs(x, y)
                # Create the lcs matrix:
                m = Array.new(x.length + 1) do
                    Array.new(y.length + 1) do
                        LCSNode.new(nil, nil)
                    end
                end

                # LCS(i, 0) and LCS(0, j) are always 0:
                for i in 0..x.length do m[i][0].value = 0 end
                for j in 0..y.length do m[0][j].value = 0 end

                # Main algorithm, solve row by row:
                for i in 1..x.length do
                    for j in 1..y.length do
                        if similar(x[i-1], y[j-1])
                            # Value is based on maximizing the length of the matched strings:
                            m[i][j].value = m[i-1][j-1].value + (x[i-1].chomp.length + y[j-1].chomp.length) / 2.0
                            m[i][j].previous = [-1, -1]
                        else
                            if m[i-1][j].value >= m[i][j-1].value
                                m[i][j].value = m[i-1][j].value
                                m[i][j].previous = [-1, 0]
                            else
                                m[i][j].value = m[i][j-1].value
                                m[i][j].previous = [0, -1]
                            end
                        end
                    end
                end

                # Get the solution by following the path backwards from m[x.length][y.length]
                lcs = []
                
                i = x.length; j = y.length
                until m[i][j].previous == nil do
                    if m[i][j].previous == [-1, -1]
                        lcs << x[i-1]
                    end
                    
                    i, j = i + m[i][j].previous[0], j + m[i][j].previous[1]
                end

                return lcs.reverse!
            end
        end
    end
end