# discourse/discourse

lib/onpdiff.rb

### Summary

D
3 days
###### Test Coverage
# frozen_string_literal: true

# Use "An O(NP) Sequence Comparison Algorithm" as described by Sun Wu, Udi Manber and Gene Myers
# in https://publications.mpi-cbg.de/Wu_1990_6334.pdf

class ONPDiff

def initialize(a, b)
@a, @b = a, b
@m, @n = a.size, b.size
@backtrack = []
if @reverse = @m > @n
@a, @b = @b, @a
@m, @n = @n, @m
end
@offset = @m + 1
@delta = @n - @m
end

def diff
@diff ||= build_edit_script(compose)
end

def short_diff
@short_diff ||= build_short_edit_script(compose)
end

def paragraph_diff
@paragraph_diff ||= build_paragraph_edit_script(diff)
end

private

def compose
return @shortest_path if @shortest_path

size = @m + @n + 3
fp = Array.new(size, -1)
@path = Array.new(size, -1)
p = -1

begin
p += 1

return (@shortest_path = []) if p >= 1000

k = -p
while k <= @delta - 1
fp[k + @offset] = snake(k, fp[k - 1 + @offset] + 1, fp[k + 1 + @offset])
k += 1
end

k = @delta + p
while k >= @delta + 1
fp[k + @offset] = snake(k, fp[k - 1 + @offset] + 1, fp[k + 1 + @offset])
k -= 1
end

fp[@delta + @offset] = snake(@delta, fp[@delta - 1 + @offset] + 1, fp[@delta + 1 + @offset])

end until fp[@delta + @offset] == @n

r = @path[@delta + @offset]

@shortest_path = []

while r != -1
@shortest_path << [@backtrack[r][0], @backtrack[r][1]]
r = @backtrack[r][2]
end

@shortest_path
end

def snake(k, p, pp)
r = p > pp ? @path[k - 1 + @offset] : @path[k + 1 + @offset]
y = [p, pp].max
x = y - k

while x < @m && y < @n && @a[x] == @b[y]
x += 1
y += 1
end

@path[k + @offset] = @backtrack.size
@backtrack << [x, y, r]

y
end

def build_edit_script(shortest_path)
ses = []
x, y = 1, 1
px, py = 0, 0
i = shortest_path.size - 1
while i >= 0
while px < shortest_path[i][0] || py < shortest_path[i][1]
if shortest_path[i][1] - shortest_path[i][0] > py - px
t = @reverse ? :delete : :add
ses << [@b[py], t]
y += 1
py += 1
elsif shortest_path[i][1] - shortest_path[i][0] < py - px
t = @reverse ? :add : :delete
ses << [@a[px], t]
x += 1
px += 1
else
ses << [@a[px], :common]
x += 1
y += 1
px += 1
py += 1
end
end
i -= 1
end
ses
end

def build_short_edit_script(shortest_path)
ses = []
x, y = 1, 1
px, py = 0, 0
i = shortest_path.size - 1
while i >= 0
while px < shortest_path[i][0] || py < shortest_path[i][1]
if shortest_path[i][1] - shortest_path[i][0] > py - px
t = @reverse ? :delete : :add
if ses.size > 0 && ses[-1][1] == t
ses[-1][0] << @b[py]
else
ses << [@b[py], t]
end
y += 1
py += 1
elsif shortest_path[i][1] - shortest_path[i][0] < py - px
t = @reverse ? :add : :delete
if ses.size > 0 && ses[-1][1] == t
ses[-1][0] << @a[px]
else
ses << [@a[px], t]
end
x += 1
px += 1
else
if ses.size > 0 && ses[-1][1] == :common
ses[-1][0] << @a[px]
else
ses << [@a[px], :common]
end
x += 1
y += 1
px += 1
py += 1
end
end
i -= 1
end
ses
end

def build_paragraph_edit_script(ses)
paragraph_ses = []
i = 0
while i < ses.size
if ses[i][1] == :common
paragraph_ses << ses[i]
else
opposite_op_code = :delete
else
op_code = :delete
end
j = i + 1

while j < ses.size && ses[j][1] == op_code
j += 1
end

if j >= ses.size
paragraph_ses = paragraph_ses.concat(ses[i..j])
i = j
else
k = j
j -= 1

while k < ses.size && ses[k][1] == opposite_op_code
k += 1
end
k -= 1

num_before = j - i + 1
num_after = k - j
if num_after > 1
if num_before > num_after
i2 = i + num_before - num_after
paragraph_ses = paragraph_ses.concat(ses[i..i2 - 1])
i = i2
elsif num_after > num_before
k -= num_after - num_before
end
paragraph_ses = paragraph_ses.concat(pair_paragraphs(ses, i, j))
else
paragraph_ses = paragraph_ses.concat(ses[i..k])
end
i = k
end
end
i += 1
end

paragraph_ses
end

def pair_paragraphs(ses, i, j)
pairs = []
num_pairs = j - i + 1
num_pairs.times do
pairs << ses[i]
pairs << ses[i + num_pairs]
i += 1
end

pairs
end

end