1 // Copyright 2012 Google, Inc. All rights reserved.
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the LICENSE file in the root of the source
7 // Package bytediff provides a simple diff utility for looking at differences in byte
8 // slices. It's slow, clunky, and not particularly good by any measure, but
9 // it does provide very useful visualizations for diffs between small byte
12 // Our diff algorithm uses a dynamic programming implementation of longest common
13 // substring to find matching parts of slices, then recursively calls itself on
14 // the prefix/suffix of that matching part for each packet. This is a Bad Idea
15 // (tm) for normal (especially large) input, but for packets where large portions
16 // repeat frequently and we expect minor changes between results, it's actually
25 // OutputFormat tells a Differences.String call how to format the set of
26 // differences into a human-readable string. Its internals are currently
27 // unexported because we may want to change them drastically in the future. For
28 // the moment, please just use one of the provided OutputFormats that comes with
30 type OutputFormat struct {
31 start, finish, add, remove, change, reset string
35 // BashOutput uses bash escape sequences to color output.
36 BashOutput = &OutputFormat{
42 // HTMLOutput uses a <pre> to wrap output, and <span>s to color it.
43 // HTMLOutput is pretty experimental, so use at your own risk ;)
44 HTMLOutput = &OutputFormat{
48 remove: "<span style='color:red'>",
49 add: "<span style='color:green'>",
50 change: "<span style='color:yellow'>",
54 // longestCommonSubstring uses a O(MN) dynamic programming approach to find the
55 // longest common substring in a set of slices. It returns the index in each
56 // slice at which the substring begins, plus the length of the commonality.
57 func longestCommonSubstring(strA, strB []byte) (indexA, indexB, length int) {
58 lenA, lenB := len(strA), len(strB)
59 if lenA == 0 || lenB == 0 {
62 arr := make([][]int, lenA)
63 for i := 0; i < lenA; i++ {
64 arr[i] = make([]int, lenB)
68 for a := 0; a < lenA; a++ {
69 for b := 0; b < lenB; b++ {
70 if strA[a] == strB[b] {
73 length = arr[a-1][b-1] + 1
76 if length > maxLength {
85 for a >= 0 && b >= 0 && strA[a] == strB[b] {
95 func intMax(a, b int) int {
102 // Difference represents a single part of the data being diffed, containing
103 // information about both the original and new values.
104 // From and To are the sets of bytes in the original and the new byte slice.
105 // !Replace implies From == To (no change)
106 // len(To) == 0 implies From is being deleted
107 // len(From) == 0 implies To is being inserted
108 // else implies From is being replaced by To
109 type Difference struct {
114 // color returns the bash color for a given difference.
115 func (c *OutputFormat) color(d Difference) string {
119 case len(d.From) == 0:
128 // Diff diffs strA and strB, returning a list of differences which
129 // can be used to construct either the original or new string.
131 // Diff is optimized for comparing VERY SHORT slices. It's meant for comparing
132 // things like packets off the wire, not large files or the like.
133 // As such, its runtime can be catastrophic if large inputs are passed in.
134 // You've been warned.
135 func Diff(strA, strB []byte) Differences {
136 if len(strA) == 0 && len(strB) == 0 {
139 ia, ib, l := longestCommonSubstring(strA, strB)
142 Difference{true, strA, strB},
145 beforeA, match, afterA := strA[:ia], strA[ia:ia+l], strA[ia+l:]
146 beforeB, afterB := strB[:ib], strB[ib+l:]
147 var diffs Differences
148 diffs = append(diffs, Diff(beforeA, beforeB)...)
149 diffs = append(diffs, Difference{false, match, match})
150 diffs = append(diffs, Diff(afterA, afterB)...)
154 // Differences is a set of differences for a given diff'd pair of byte slices.
155 type Differences []Difference
157 // String outputs a previously diff'd set of strings, showing differences
158 // between them, highlighted by colors.
160 // The output format of this function is NOT guaranteed consistent, and may be
161 // changed at any time by the library authors. It's meant solely for human
163 func (c *OutputFormat) String(diffs Differences) string {
166 fmt.Fprintf(&buf, "%s", c.start)
167 fmt.Fprintf(&buf, "00000000 ")
168 for i := 0; i < len(diffs); i++ {
170 color := c.color(diff)
175 fmt.Fprint(&buf, color)
176 for _, b := range diff.From {
177 fmt.Fprintf(&buf, " %02x", b)
181 fmt.Fprintf(&buf, "%v\n%08x%v ", reset, count, color)
183 fmt.Fprintf(&buf, " ")
186 fmt.Fprint(&buf, reset)
188 fmt.Fprintf(&buf, "\n\n00000000 ")
190 for i := 0; i < len(diffs); i++ {
196 color := c.color(diff)
201 fmt.Fprint(&buf, color)
202 for _, b := range str {
203 fmt.Fprintf(&buf, " %02x", b)
207 fmt.Fprintf(&buf, "%v\n%08x%v ", reset, count, color)
209 fmt.Fprintf(&buf, " ")
212 fmt.Fprint(&buf, reset)
214 fmt.Fprint(&buf, "\n")
215 fmt.Fprintf(&buf, "%s", c.finish)
216 return string(buf.Bytes())