NXS_HtmlFixer

The NextScripts: Social Networks Auto-Poster NXS HtmlFixer class.

Defined (1)

The class is defined in the following location(s).

/nxs_functions.php  
  1. class NXS_HtmlFixer { public $dirtyhtml; public $fixedhtml; public $allowed_styles; private $matrix; public $debug; private $fixedhtmlDisplayCode; 
  2. public function __construct() { $this->dirtyhtml = ""; $this->fixedhtml = ""; $this->debug = false; $this->fixedhtmlDisplayCode = ""; $this->allowed_styles = array();} 
  3. public function getFixedHtml($dirtyhtml) { $c = 0; $this->dirtyhtml = $dirtyhtml; $this->fixedhtml = ""; $this->fixedhtmlDisplayCode = ""; if (is_array($this->matrix)) unset($this->matrix); $errorsFound=0; 
  4. while ($c<10) { if ($c>0) $this->dirtyhtml = $this->fixedxhtml; $errorsFound = $this->charByCharJob(); if (!$errorsFound) $c=10; $this->fixedxhtml=str_replace('<root>', '', $this->fixedxhtml);  
  5. $this->fixedxhtml=str_replace('</root>', '', $this->fixedxhtml); $this->fixedxhtml = $this->removeSpacesAndBadTags($this->fixedxhtml); $c++; 
  6. } return $this->fixedxhtml; 
  7. private function fixStrToLower($m) { $right = strstr($m, '='); $left = str_replace($right, '', $m); return strtolower($left).$right;} 
  8. private function fixQuotes($s) { $q = "\""; if (!stristr($s, "=")) return $s; $out = $s; preg_match_all("|=(.*)|", $s, $o, PREG_PATTERN_ORDER); 
  9. for ($i = 0; $i< count ($o[1]); $i++) { $t = trim ( $o[1][$i] ) ; $lc=""; if ($t!="") { if ($t[strlen($t)-1]==">") { $lc= ($t[strlen($t)-2].$t[strlen($t)-1])=="/>" ? "/>" : ">" ; $t=substr($t, 0, -1);} 
  10. if (($t[0]!="\"")&&($t[0]!="'")) $out = str_replace( $t, "\"".$t, $out); else $q=$t[0]; if (($t[strlen($t)-1]!="\"")&&($t[strlen($t)-1]!="'")) $out = str_replace( $t.$lc, $t.$q.$lc, $out); 
  11. }} return $out; 
  12. private function fixTag($t) { $t = preg_replace ( array( '/borderColor=([^ >])*/i', '/border=([^ >])*/i' ), array('', ''), $t); 
  13. preg_match_all('/(?:"[^"]*"|\'[^\']*\'|[^"\'\s]+)+/', $t, $ar); $ar = $ar[0];// prr($ar); 
  14. $nt = ""; for ($i=0;$i<count($ar);$i++) { if (strpos($ar[$i], 'href=\\\\\\"')!==false) {$ar[$i] = str_replace('\\\\\\"', '"', $ar[$i]);} 
  15. if (strpos($ar[$i], 'href=\\"')!==false) {$ar[$i] = str_replace('\\"', '"', $ar[$i]);} if (strpos($ar[$i], 'href=\"')!==false) {$ar[$i] = str_replace('\"', '"', $ar[$i]);} 
  16. $ar[$i]=$this->fixStrToLower($ar[$i]); if (stristr($ar[$i], "=")) $ar[$i] = $this->fixQuotes($ar[$i]); $nt.=$ar[$i]." ";  
  17. } $nt=preg_replace("/<( )*/i", "<", $nt); $nt=preg_replace("/( )*>/i", ">", $nt); return trim($nt); 
  18. private function extractChars($tag1, $tag2, $tutto) { if (!stristr($tutto, $tag1)) return ''; $s=stristr($tutto, $tag1); $s=substr( $s, strlen($tag1)); if (!stristr($s, $tag2)) return ''; 
  19. $s1=stristr($s, $tag2); return substr($s, 0, strlen($s)-strlen($s1)); 
  20. private function mergeStyleAttributes($s) { $x = ""; $temp = ""; $c = 0; 
  21. while(stristr($s, "style=\"")) {$temp = $this->extractChars("style=\"", "\"", $s); if ($temp=="") { return preg_replace("/(\/)?>/i", "\"\\1>", $s);} 
  22. if ($c==0) $s = str_replace("style=\"".$temp."\"", "##PUTITHERE##", $s); $s = str_replace("style=\"".$temp."\"", "", $s); if (!preg_match("/;$/i", $temp)) $temp.=";"; $x.=$temp; $c++; 
  23. if (count($this->allowed_styles)>0) { $check=explode(';', $x); $x=""; foreach($check as $chk) { foreach($this->allowed_styles as $as) if(stripos($chk, $as) !== False) { $x.=$chk.';'; break; } }} 
  24. if ($c>0) $s = str_replace("##PUTITHERE##", "style=\"".$x."\"", $s);return $s; 
  25. private function fixAutoclosingTags($tag, $tipo="") { if (in_array( $tipo, array ("img", "input", "br", "hr")) ) { if (!stristr($tag, '/>')) $tag = str_replace('>', '/>', $tag ); } return $tag; } 
  26. private function getTypeOfTag($tag) { $tag = trim(preg_replace("/[\>\<\/]/i", "", $tag)); $a = explode(" ", $tag); return $a[0];} 
  27. private function checkTree() { $errorsCounter = 0; for ($i=1;$i<count($this->matrix);$i++) { $flag=false; 
  28. if ($this->matrix[$i]["tagType"]=="div") { $parentType = $this->matrix[$this->matrix[$i]["parentTag"]]["tagType"]; if (in_array($parentType, array("p", "b", "i", "font", "u", "small", "strong", "em"))) $flag=true; } 
  29. if (in_array( $this->matrix[$i]["tagType"], array( "b", "strong" )) ) { $parentType = $this->matrix[$this->matrix[$i]["parentTag"]]["tagType"]; if (in_array($parentType, array("b", "strong"))) $flag=true; } 
  30. if (in_array( $this->matrix[$i]["tagType"], array ( "i", "em") )) { $parentType = $this->matrix[$this->matrix[$i]["parentTag"]]["tagType"]; if (in_array($parentType, array("i", "em"))) $flag=true; } 
  31. if ($this->matrix[$i]["tagType"]=="p") { $parentType = $this->matrix[$this->matrix[$i]["parentTag"]]["tagType"]; if (in_array($parentType, array("p", "b", "i", "font", "u", "small", "strong", "em"))) $flag=true; } 
  32. if ($this->matrix[$i]["tagType"]=="table") { $parentType = $this->matrix[$this->matrix[$i]["parentTag"]]["tagType"]; if (in_array($parentType, array("p", "b", "i", "font", "u", "small", "strong", "em", "tr", "table"))) $flag=true; } 
  33. if ($flag) { $errorsCounter++; if ($this->debug) echo "<div style='color:#ff0000'>Found a <b>".$this->matrix[$i]["tagType"]."</b> tag inside a <b>".htmlspecialchars($parentType)."</b> tag at node $i: MOVED</div>";  
  34. $swap = $this->matrix[$this->matrix[$i]["parentTag"]]["parentTag"]; if ($this->debug) echo "<div style='color:#ff0000'>Every node that has parent ".$this->matrix[$i]["parentTag"]." will have parent ".$swap."</div>"; 
  35. $this->matrix[$this->matrix[$i]["parentTag"]]["tag"]="<!-- T A G \"".$this->matrix[$this->matrix[$i]["parentTag"]]["tagType"]."\" R E M O V E D -->"; $this->matrix[$this->matrix[$i]["parentTag"]]["tagType"]=""; 
  36. $hoSpostato=0;for ($j=count($this->matrix)-1;$j>=$i;$j--) { if ($this->matrix[$j]["parentTag"]==$this->matrix[$i]["parentTag"]) { $this->matrix[$j]["parentTag"] = $swap; $hoSpostato=1; }} 
  37. }}return $errorsCounter; 
  38. private function findSonsOf($parentTag) { $out= ""; 
  39. for ($i=1;$i<count($this->matrix);$i++) { if ($this->matrix[$i]["parentTag"]==$parentTag) { 
  40. if ($this->matrix[$i]["tag"]!="") { $out.=$this->matrix[$i]["pre"]; $out.=$this->matrix[$i]["tag"]; $out.=$this->matrix[$i]["post"]; } else { $out.=$this->matrix[$i]["pre"]; $out.=$this->matrix[$i]["post"];} 
  41. if ($this->matrix[$i]["tag"]!="") { $out.=$this->findSonsOf($i); if ($this->matrix[$i]["tagType"]!="") { if (!in_array($this->matrix[$i]["tagType"], array ( "br", "img", "hr", "input"))) $out.="</". $this->matrix[$i]["tagType"].">";}} 
  42. }}return $out; 
  43. private function findSonsOfDisplayCode($parentTag) { $out= ""; 
  44. for ($i=1;$i<count($this->matrix);$i++) { 
  45. if ($this->matrix[$i]["parentTag"]==$parentTag) { $out.= "<div style=\"padding-left:15\"><span style='float:left;background-color:#FFFF99;color:#000;'>{$i}:</span>"; 
  46. if ($this->matrix[$i]["tag"]!="") { if ($this->matrix[$i]["pre"]!="") $out.=htmlspecialchars($this->matrix[$i]["pre"])."<br>"; 
  47. $out.="".htmlspecialchars($this->matrix[$i]["tag"])."<span style='background-color:red; color:white'>{$i} <em>".$this->matrix[$i]["tagType"]."</em></span>"; 
  48. $out.=htmlspecialchars($this->matrix[$i]["post"]); 
  49. } else { if ($this->matrix[$i]["pre"]!="") $out.=htmlspecialchars($this->matrix[$i]["pre"])."<br>"; $out.=htmlspecialchars($this->matrix[$i]["post"]);} 
  50. if ($this->matrix[$i]["tag"]!="") { $out.="<div>".$this->findSonsOfDisplayCode($i)."</div>\n"; 
  51. if ($this->matrix[$i]["tagType"]!="") { 
  52. if (($this->matrix[$i]["tagType"]!="br") && ($this->matrix[$i]["tagType"]!="img") && ($this->matrix[$i]["tagType"]!="hr")&& ($this->matrix[$i]["tagType"]!="input")) 
  53. $out.="<div style='color:red'>".htmlspecialchars("</". $this->matrix[$i]["tagType"].">")."{$i} <em>".$this->matrix[$i]["tagType"]."</em></div>"; 
  54. } $out.="</div>\n"; 
  55. }return $out; 
  56. private function removeSpacesAndBadTags($s) { $i=0; 
  57. while ($i<10) { $i++; $s = preg_replace ( 
  58. array( '/ /i', '/<p([^>])*>( )*\s*<\/p>/i', '/<span([^>])*>( )*\s*<\/span>/i', '/<strong([^>])*>( )*\s*<\/strong>/i', '/<em([^>])*>( )*\s*<\/em>/i',  
  59. '/<font([^>])*>( )*\s*<\/font>/i', '/<small([^>])*>( )*\s*<\/small>/i', '/<\?xml:namespace([^>])*><\/\?xml:namespace>/i', '/<\?xml:namespace([^>])*\/>/i', '/class=\"MsoNormal\"/i',  
  60. '/<o:p><\/o:p>/i', '/<!DOCTYPE([^>])*>/i', '/<!--(.|\s)*?-->/', '/<\?(.|\s)*?\?>/'),  
  61. array(' ', ' ', '', '', '', '', '', '', '', '', '', ' ', '', '' ) , trim($s)); 
  62. }return $s; 
  63. private function charByCharJob() { $s = $this->removeSpacesAndBadTags($this->dirtyhtml); if ($s=="") return; //echo "\r\n=!= ".$s." =!=\r\n<br/>\r\n"; 
  64. $s = "<root>".$s."</root>"; $contenuto = ""; $ns = ""; $i=0; $j=0; $ss=''; $indexparentTag=0; $padri=array(); array_push($padri, "0"); $this->matrix[$j]["tagType"]=""; 
  65. $this->matrix[$j]["tag"]=""; $this->matrix[$j]["parentTag"]="0"; $this->matrix[$j]["pre"]=""; $this->matrix[$j]["post"]=""; $tags=array(); 
  66. // echo "\r\n=#= ".$s." =#=\r\n<br/>\r\n"; 
  67. while($i<strlen($s)) { 
  68. if ( $s[$i] =="<") { $contenuto = $ns; $ns = ""; $tag=""; while( $i<strlen($s) && $s[$i]!=">" ) { $tag.=$s[$i]; $i++;} $tag.=$s[$i]; if (stristr($tag, '<param') && stristr($tag, '/>')) $tag = str_replace('/>', '></param>', $tag); 
  69. $ss .= $tag;  
  70. } else $ss .= $s[$i]; $i++; } 
  71. $i=0; $s = $ss; //echo "\r\n== ".$s." ==\r\n<br/>\r\n"; 
  72. while($i<strlen($s)) { 
  73. if ( $s[$i] =="<") { $contenuto = $ns; $ns = ""; $tag=""; while( $i<strlen($s) && $s[$i]!=">" ) { $tag.=$s[$i]; $i++;} $tag.=$s[$i];  
  74. if($s[$i]==">") { $tag = $this->fixTag($tag); $tagType = $this->getTypeOfTag($tag); $tag = $this->fixAutoclosingTags($tag, $tagType); 
  75. $tag = $this->mergeStyleAttributes($tag); if (!isset($tags[$tagType])) $tags[$tagType]=0; $tagok=true; 
  76. if (($tags[$tagType]==0)&&(stristr($tag, '/'.$tagType.'>'))&&(stristr($tag, '<'.$tagType)!==false)) { $tagok=false; if ($this->debug) echo "<div style='color:#ff0000'>Found a closing tag <b>".htmlspecialchars($tag)."</b> at char $i without open tag: REMOVED</div>";} else $tagok=true; 
  77. if ($tagok) { $j++; $this->matrix[$j]["pre"]=""; $this->matrix[$j]["post"]=""; $this->matrix[$j]["parentTag"]=""; $this->matrix[$j]["tag"]=""; $this->matrix[$j]["tagType"]=""; 
  78. if (stristr($tag, '/'.$tagType.'>')) { $ind = array_pop($padri); $this->matrix[$j]["post"]=$contenuto; $this->matrix[$j]["parentTag"]=$ind; $tags[$tagType]--; 
  79. } else { if (@preg_match("/".$tagType."\/>$/i", $tag)||preg_match("/\/>/i", $tag)) { $this->matrix[$j]["tagType"]=$tagType; $this->matrix[$j]["tag"]=$tag; 
  80. $indexparentTag = array_pop($padri); array_push($padri, $indexparentTag); $this->matrix[$j]["parentTag"]=$indexparentTag; $this->matrix[$j]["pre"]=$contenuto; $this->matrix[$j]["post"]=""; 
  81. } else { $tags[$tagType]++; $this->matrix[$j]["tagType"]=$tagType; $this->matrix[$j]["tag"]=$tag; $indexparentTag = array_pop($padri); array_push($padri, $indexparentTag); 
  82. array_push($padri, $j); $this->matrix[$j]["parentTag"]=$indexparentTag; $this->matrix[$j]["pre"]=$contenuto; $this->matrix[$j]["post"]=""; } 
  83. } else { $ns.=$s[$i]; } $i++; 
  84. } for ($eli=$j+1;$eli<count($this->matrix);$eli++) { $this->matrix[$eli]["pre"]=""; $this->matrix[$eli]["post"]=""; $this->matrix[$eli]["parentTag"]=""; $this->matrix[$eli]["tag"]=""; $this->matrix[$eli]["tagType"]="";} 
  85. $errorsCounter = $this->checkTree(); $this->fixedxhtml=$this->findSonsOf(0);return $errorsCounter;