Text_Diff_Engine_native

Class used internally by Text_Diff to actually compute the diffs.

Defined (1)

The class is defined in the following location(s).

/wp-includes/Text/Diff/Engine/native.php  
  1. class Text_Diff_Engine_native { 
  2.  
  3. function diff($from_lines, $to_lines) 
  4. array_walk($from_lines, array('Text_Diff', 'trimNewlines')); 
  5. array_walk($to_lines, array('Text_Diff', 'trimNewlines')); 
  6.  
  7. $n_from = count($from_lines); 
  8. $n_to = count($to_lines); 
  9.  
  10. $this->xchanged = $this->ychanged = array(); 
  11. $this->xv = $this->yv = array(); 
  12. $this->xind = $this->yind = array(); 
  13. unset($this->seq); 
  14. unset($this->in_seq); 
  15. unset($this->lcs); 
  16.  
  17. // Skip leading common lines. 
  18. for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) { 
  19. if ($from_lines[$skip] !== $to_lines[$skip]) { 
  20. break; 
  21. $this->xchanged[$skip] = $this->ychanged[$skip] = false; 
  22.  
  23. // Skip trailing common lines. 
  24. $xi = $n_from; $yi = $n_to; 
  25. for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) { 
  26. if ($from_lines[$xi] !== $to_lines[$yi]) { 
  27. break; 
  28. $this->xchanged[$xi] = $this->ychanged[$yi] = false; 
  29.  
  30. // Ignore lines which do not exist in both files. 
  31. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { 
  32. $xhash[$from_lines[$xi]] = 1; 
  33. for ($yi = $skip; $yi < $n_to - $endskip; $yi++) { 
  34. $line = $to_lines[$yi]; 
  35. if (($this->ychanged[$yi] = empty($xhash[$line]))) { 
  36. continue; 
  37. $yhash[$line] = 1; 
  38. $this->yv[] = $line; 
  39. $this->yind[] = $yi; 
  40. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { 
  41. $line = $from_lines[$xi]; 
  42. if (($this->xchanged[$xi] = empty($yhash[$line]))) { 
  43. continue; 
  44. $this->xv[] = $line; 
  45. $this->xind[] = $xi; 
  46.  
  47. // Find the LCS. 
  48. $this->_compareseq(0, count($this->xv), 0, count($this->yv)); 
  49.  
  50. // Merge edits when possible. 
  51. $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged); 
  52. $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged); 
  53.  
  54. // Compute the edit operations. 
  55. $edits = array(); 
  56. $xi = $yi = 0; 
  57. while ($xi < $n_from || $yi < $n_to) { 
  58. assert($yi < $n_to || $this->xchanged[$xi]); 
  59. assert($xi < $n_from || $this->ychanged[$yi]); 
  60.  
  61. // Skip matching "snake". 
  62. $copy = array(); 
  63. while ($xi < $n_from && $yi < $n_to 
  64. && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { 
  65. $copy[] = $from_lines[$xi++]; 
  66. ++$yi; 
  67. if ($copy) { 
  68. $edits[] = new Text_Diff_Op_copy($copy); 
  69.  
  70. // Find deletes & adds. 
  71. $delete = array(); 
  72. while ($xi < $n_from && $this->xchanged[$xi]) { 
  73. $delete[] = $from_lines[$xi++]; 
  74.  
  75. $add = array(); 
  76. while ($yi < $n_to && $this->ychanged[$yi]) { 
  77. $add[] = $to_lines[$yi++]; 
  78.  
  79. if ($delete && $add) { 
  80. $edits[] = new Text_Diff_Op_change($delete, $add); 
  81. } elseif ($delete) { 
  82. $edits[] = new Text_Diff_Op_delete($delete); 
  83. } elseif ($add) { 
  84. $edits[] = new Text_Diff_Op_add($add); 
  85.  
  86. return $edits; 
  87.  
  88. /** 
  89. * Divides the Largest Common Subsequence (LCS) of the sequences (XOFF,  
  90. * XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized 
  91. * segments. 
  92. * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of 
  93. * NCHUNKS+1 (X, Y) indexes giving the diving points between sub 
  94. * sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1),  
  95. * the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == 
  96. * (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). 
  97. * This function assumes that the first lines of the specified portions of 
  98. * the two files do not match, and likewise that the last lines do not 
  99. * match. The caller must trim matching lines from the beginning and end 
  100. * of the portions it is going to specify. 
  101. */ 
  102. function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) 
  103. $flip = false; 
  104.  
  105. if ($xlim - $xoff > $ylim - $yoff) { 
  106. /** Things seems faster (I'm not sure I understand why) when the 
  107. * shortest sequence is in X. */ 
  108. $flip = true; 
  109. list ($xoff, $xlim, $yoff, $ylim) 
  110. = array($yoff, $ylim, $xoff, $xlim); 
  111.  
  112. if ($flip) { 
  113. for ($i = $ylim - 1; $i >= $yoff; $i--) { 
  114. $ymatches[$this->xv[$i]][] = $i; 
  115. } else { 
  116. for ($i = $ylim - 1; $i >= $yoff; $i--) { 
  117. $ymatches[$this->yv[$i]][] = $i; 
  118.  
  119. $this->lcs = 0; 
  120. $this->seq[0]= $yoff - 1; 
  121. $this->in_seq = array(); 
  122. $ymids[0] = array(); 
  123.  
  124. $numer = $xlim - $xoff + $nchunks - 1; 
  125. $x = $xoff; 
  126. for ($chunk = 0; $chunk < $nchunks; $chunk++) { 
  127. if ($chunk > 0) { 
  128. for ($i = 0; $i <= $this->lcs; $i++) { 
  129. $ymids[$i][$chunk - 1] = $this->seq[$i]; 
  130.  
  131. $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $chunk) / $nchunks); 
  132. for (; $x < $x1; $x++) { 
  133. $line = $flip ? $this->yv[$x] : $this->xv[$x]; 
  134. if (empty($ymatches[$line])) { 
  135. continue; 
  136. $matches = $ymatches[$line]; 
  137. reset($matches); 
  138. while (list(, $y) = each($matches)) { 
  139. if (empty($this->in_seq[$y])) { 
  140. $k = $this->_lcsPos($y); 
  141. assert($k > 0); 
  142. $ymids[$k] = $ymids[$k - 1]; 
  143. break; 
  144. while (list(, $y) = each($matches)) { 
  145. if ($y > $this->seq[$k - 1]) { 
  146. assert($y <= $this->seq[$k]); 
  147. /** Optimization: this is a common case: next match is 
  148. * just replacing previous match. */ 
  149. $this->in_seq[$this->seq[$k]] = false; 
  150. $this->seq[$k] = $y; 
  151. $this->in_seq[$y] = 1; 
  152. } elseif (empty($this->in_seq[$y])) { 
  153. $k = $this->_lcsPos($y); 
  154. assert($k > 0); 
  155. $ymids[$k] = $ymids[$k - 1]; 
  156.  
  157. $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); 
  158. $ymid = $ymids[$this->lcs]; 
  159. for ($n = 0; $n < $nchunks - 1; $n++) { 
  160. $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); 
  161. $y1 = $ymid[$n] + 1; 
  162. $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); 
  163. $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); 
  164.  
  165. return array($this->lcs, $seps); 
  166.  
  167. function _lcsPos($ypos) 
  168. $end = $this->lcs; 
  169. if ($end == 0 || $ypos > $this->seq[$end]) { 
  170. $this->seq[++$this->lcs] = $ypos; 
  171. $this->in_seq[$ypos] = 1; 
  172. return $this->lcs; 
  173.  
  174. $beg = 1; 
  175. while ($beg < $end) { 
  176. $mid = (int)(($beg + $end) / 2); 
  177. if ($ypos > $this->seq[$mid]) { 
  178. $beg = $mid + 1; 
  179. } else { 
  180. $end = $mid; 
  181.  
  182. assert($ypos != $this->seq[$end]); 
  183.  
  184. $this->in_seq[$this->seq[$end]] = false; 
  185. $this->seq[$end] = $ypos; 
  186. $this->in_seq[$ypos] = 1; 
  187. return $end; 
  188.  
  189. /** 
  190. * Finds LCS of two sequences. 
  191. * The results are recorded in the vectors $this->{x, y}changed[], by 
  192. * storing a 1 in the element for each line that is an insertion or 
  193. * deletion (ie. is not in the LCS). 
  194. * The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1. 
  195. * Note that XLIM, YLIM are exclusive bounds. All line numbers are 
  196. * origin-0 and discarded lines are not counted. 
  197. */ 
  198. function _compareseq ($xoff, $xlim, $yoff, $ylim) 
  199. /** Slide down the bottom initial diagonal. */ 
  200. while ($xoff < $xlim && $yoff < $ylim 
  201. && $this->xv[$xoff] == $this->yv[$yoff]) { 
  202. ++$xoff; 
  203. ++$yoff; 
  204.  
  205. /** Slide up the top initial diagonal. */ 
  206. while ($xlim > $xoff && $ylim > $yoff 
  207. && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { 
  208. --$xlim; 
  209. --$ylim; 
  210.  
  211. if ($xoff == $xlim || $yoff == $ylim) { 
  212. $lcs = 0; 
  213. } else { 
  214. /** This is ad hoc but seems to work well. $nchunks = 
  215. * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks = 
  216. * max(2, min(8, (int)$nchunks)); */ 
  217. $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; 
  218. list($lcs, $seps) 
  219. = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks); 
  220.  
  221. if ($lcs == 0) { 
  222. /** X and Y sequences have no common subsequence: mark all 
  223. * changed. */ 
  224. while ($yoff < $ylim) { 
  225. $this->ychanged[$this->yind[$yoff++]] = 1; 
  226. while ($xoff < $xlim) { 
  227. $this->xchanged[$this->xind[$xoff++]] = 1; 
  228. } else { 
  229. /** Use the partitions to split this problem into subproblems. */ 
  230. reset($seps); 
  231. $pt1 = $seps[0]; 
  232. while ($pt2 = next($seps)) { 
  233. $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]); 
  234. $pt1 = $pt2; 
  235.  
  236. /** 
  237. * Adjusts inserts/deletes of identical lines to join changes as much as 
  238. * possible. 
  239. * We do something when a run of changed lines include a line at one end 
  240. * and has an excluded, identical line at the other. We are free to 
  241. * choose which identical line is included. `compareseq' usually chooses 
  242. * the one at the beginning, but usually it is cleaner to consider the 
  243. * following identical line to be the "change". 
  244. * This is extracted verbatim from analyze.c (GNU diffutils-2.7). 
  245. */ 
  246. function _shiftBoundaries($lines, &$changed, $other_changed) 
  247. $i = 0; 
  248. $j = 0; 
  249.  
  250. assert('count($lines) == count($changed)'); 
  251. $len = count($lines); 
  252. $other_len = count($other_changed); 
  253.  
  254. while (1) { 
  255. /** Scan forward to find the beginning of another run of 
  256. * changes. Also keep track of the corresponding point in the 
  257. * other file. 
  258. * Throughout this code, $i and $j are adjusted together so that 
  259. * the first $i elements of $changed and the first $j elements of 
  260. * $other_changed both contain the same number of zeros (unchanged 
  261. * lines). 
  262. * Furthermore, $j is always kept so that $j == $other_len or 
  263. * $other_changed[$j] == false. */ 
  264. while ($j < $other_len && $other_changed[$j]) { 
  265. $j++; 
  266.  
  267. while ($i < $len && ! $changed[$i]) { 
  268. assert('$j < $other_len && ! $other_changed[$j]'); 
  269. $i++; $j++; 
  270. while ($j < $other_len && $other_changed[$j]) { 
  271. $j++; 
  272.  
  273. if ($i == $len) { 
  274. break; 
  275.  
  276. $start = $i; 
  277.  
  278. /** Find the end of this run of changes. */ 
  279. while (++$i < $len && $changed[$i]) { 
  280. continue; 
  281.  
  282. do { 
  283. /** Record the length of this run of changes, so that we can 
  284. * later determine whether the run has grown. */ 
  285. $runlength = $i - $start; 
  286.  
  287. /** Move the changed region back, so long as the previous 
  288. * unchanged line matches the last changed one. This merges 
  289. * with previous changed regions. */ 
  290. while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) { 
  291. $changed[--$start] = 1; 
  292. $changed[--$i] = false; 
  293. while ($start > 0 && $changed[$start - 1]) { 
  294. $start--; 
  295. assert('$j > 0'); 
  296. while ($other_changed[--$j]) { 
  297. continue; 
  298. assert('$j >= 0 && !$other_changed[$j]'); 
  299.  
  300. /** Set CORRESPONDING to the end of the changed run, at the 
  301. * last point where it corresponds to a changed run in the 
  302. * other file. CORRESPONDING == LEN means no such point has 
  303. * been found. */ 
  304. $corresponding = $j < $other_len ? $i : $len; 
  305.  
  306. /** Move the changed region forward, so long as the first 
  307. * changed line matches the following unchanged one. This 
  308. * merges with following changed regions. Do this second, so 
  309. * that if there are no merges, the changed region is moved 
  310. * forward as far as possible. */ 
  311. while ($i < $len && $lines[$start] == $lines[$i]) { 
  312. $changed[$start++] = false; 
  313. $changed[$i++] = 1; 
  314. while ($i < $len && $changed[$i]) { 
  315. $i++; 
  316.  
  317. assert('$j < $other_len && ! $other_changed[$j]'); 
  318. $j++; 
  319. if ($j < $other_len && $other_changed[$j]) { 
  320. $corresponding = $i; 
  321. while ($j < $other_len && $other_changed[$j]) { 
  322. $j++; 
  323. } while ($runlength != $i - $start); 
  324.  
  325. /** If possible, move the fully-merged run of changes back to a 
  326. * corresponding run in the other file. */ 
  327. while ($corresponding < $i) { 
  328. $changed[--$start] = 1; 
  329. $changed[--$i] = 0; 
  330. assert('$j > 0'); 
  331. while ($other_changed[--$j]) { 
  332. continue; 
  333. assert('$j >= 0 && !$other_changed[$j]'); 
  334.