php正则替换处理HTML页面的方法

5年以前  |  阅读数:592 次  |  编程语言:PHP 

本文实例讲述了php正则替换处理HTML页面的方法。分享给大家供大家参考。具体如下:


    <?php
    if(!defined('BASEPATH')) exit('No direct script access allowed');
     /**
     * HTML替换处理类,考虑如下几种替换
     * 1. img src : '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
     * 2. a href : '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i'
     * 3. ifram.src : '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
     * 4. frame src : '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
     * 5. js : '/window.open([( ]+?)([\'" ]+?)(.+?)([ )+?])/i'
     * 6. css : '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i'
     */
     class Myreplace {
     private $moudle_array = array('udata','tdata','tresult','dresult');
     private $content;
     private $relative_dirname;
     private $projectid;
     private $moudle;
     function __construct() {
      $this->CI = &get;_instance ();
     }
     /**
      * 替换
      * @param string $content HTML内容
      * @param string $relative 相对路径
      * @param int $projectid 项目id
      * @moudle string $moudle 模板标识: udata,tdata,tresult,dresult
      */
     public function my_replace($content,$relative,$projectid,$moudle) {
      $this->content = $content;
      $this->relative_dirname = $relative;
      $this->projectid = $projectid;
      if(in_array(strtolower($moudle),$this->moudle_array))
      $this->moudle = $moudle;
      else exit;
      switch($this->moudle) {
      case 'udata':
       $this->CI->load->model('mupload_data','model');
       break;
      case 'tdata':
       $this->CI->load->model('taskdata','model');
       break;
      case 'tresult':
       $this->CI->load->model('taskresult','model');
       break;
      case 'dresult':
       $this->CI->load->model('dmsresult','model');
       break;
      default:
       break;
      }
      $pattern = '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
      $content = preg_replace_callback( $pattern, array($this, 'image_replace') , $content );
      $pattern = '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i';
      $content = preg_replace_callback( $pattern, array($this, 'html_replace') , $content );
      $pattern = '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
      $content = preg_replace_callback( $pattern, array($this, 'iframe_replace') , $content );
      $pattern = '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'; 
      $content = preg_replace_callback( $pattern, array($this, 'frame_replace'), $content );
      $pattern = '/window.open([( ]+?)([\'" ]+?)(.+?)([ )]+?)/i';
      $content = preg_replace_callback( $pattern, array($this, 'js_replace'), $content );
      $pattern = '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i';
      $content = preg_replace_callback( $pattern, array($this, 'css_replace'), $content);
      return $content;
     }
     private function image_replace($matches) {
      if(count($matches) < 4) return '';
      if( empty($matches[3]) ) return '';
      $matches[3] = rtrim($matches[3],'\'"/');
      //获取图片的id
      $parent_dir_num = substr_count( $matches[3], '../');
      $relative_dirname = $this->relative_dirname;
      for($i=0; $i<$parent_dir_num; $i++) {
      $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
      }
      $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
      $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
      //输出
      if( !empty($image_id) ) {
      if($this->moudle == 'dresult') {
       return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
      } else {
       return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
      }
      } else {
      return "<img".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
      }
     }
     private function html_replace( $matches ) {
      if(count($matches) < 4) return '';
      if( empty($matches[3]) ) return '';
      //如果href的链接($matches[3])以http或www或mailto开始,则不进行处理
      //if(preg_match('/^[http|www|mailto](.+?)/i',$matches[3])) 
      // return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[4];
      $matches[3] = rtrim($matches[3],'\'"/');
      //处理锚点
      if(substr_count($matches[3],'#')>0) 
      $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
      //获取html的id
      $parent_dir_num = substr_count( $matches[3], '../');
      $relative_dirname = $this->relative_dirname;
      for($i=0; $i<$parent_dir_num; $i++) {
      $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
      }
      $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
      $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
      //输出
      if( !empty($txtfile_id ) ) {
      if($this->moudle == 'dresult') {
       return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
      } else {
       return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
      }
      } else {
      return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[2].$matches[4];
      }
     }
     private function iframe_replace( $matches ) {
      if(count($matches) < 4) return '';
      if( empty($matches[3]) ) return '';
      $matches[3] = rtrim($matches[3],'\'"/');
      //处理锚点
      if(substr_count($matches[3],'#')>0) 
      $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
      //获取html的id
      $parent_dir_num = substr_count( $matches[3], '../');
      $relative_dirname = $this->relative_dirname;
      for($i=0; $i<$parent_dir_num; $i++) {
      $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
      }
      $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
      $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
      //输出
      if( !empty($txtfile_id ) ) {
      if($this->moudle == 'dresult') { 
       return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
      } else {
       return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
      }
      } else {
      return "<iframe".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
      }
     }
     private function frame_replace( $matches ) {  
      if(count($matches) < 4) return '';
      if( empty($matches[3]) ) return '';
      $matches[3] = rtrim($matches[3],'\'"/');
      //处理锚点
      if(substr_count($matches[3],'#')>0) 
      $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
      //获取html的id
      $parent_dir_num = substr_count( $matches[3], '../');
      $relative_dirname = $this->relative_dirname;
      for($i=0; $i<$parent_dir_num; $i++) {
      $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
      }
      $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
      $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
      //输出
      if( !empty($txtfile_id ) ) {
      if($this->moudle == 'dresult') { 
       return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
      } else {
       return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
      }
      } else {
      return "<frame".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
      }
     }
     private function js_replace( $matches ){
      if(count($matches) < 4) return '';
      if( empty($matches[3]) ) return '';
      //处理链接
      $arr_html = split(',',$matches[3]);
      $href = $arr_html[0];
      $other = '';
      for($i=0; $i<count($arr_html); $i++)
      $other = $arr_html[$i].", ";
      $other = rtrim($other,"\, ");
      $href =rtrim($href,'\'\"');
      //处理锚点
      if(substr_count($href,'#')>0) 
      return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];;
      //获取html的id
      $parent_dir_num = substr_count( $href, '../');
      $relative_dirname = $this->relative_dirname;
      for($i=0; $i<$parent_dir_num; $i++) {
      $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
      }
      $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($href,'./');
      $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
      //输出
      if( !empty($txtfile_id ) ) {
      if($this->moudle == 'dresult') { 
       return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
      } else {
       return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
      }
      } else {
      return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];
      }
     }
     private function css_replace( $matches ) {
      if(count($matches) < 5) return '';
      if( empty($matches[4]) ) return '';

      $matches[4] = rtrim($matches[4],'\'"/');
      //获取图片的id
      $parent_dir_num = substr_count( $matches[4], '../');
      $relative_dirname = $this->relative_dirname;
      for($i=0; $i<$parent_dir_num; $i++) {
      $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
      }
      $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[4],'./');
      $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
      //输出
      if( !empty($image_id) ) {
      if($this->moudle == 'dresult') {
       return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
      } else {
       return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
      }
      } else {
      return "background".$matches[1]."url".$matches[2].$matches[3].$matches[4].$matches[3].$matches[5];
      }
     }
     }
    /* End of Myreplace.php */
    /* Location: /application/libraries/Myreplace.php */

PS:这里再为大家提供2款非常方便的正则表达式工具供大家参考使用:

JavaScript正则表达式在线测试工具:
http://tools.jb51.net/regex/javascript

正则表达式在线生成工具:
http://tools.jb51.net/regex/create_reg

希望本文所述对大家的php程序设计有所帮助。

 相关文章:
PHP分页显示制作详细讲解
SSH 登录失败:Host key verification failed
将二进制数据转为16进制以便显示
获取IMSI
获取IMEI
Java生成UUID
PHP自定义函数获取搜索引擎来源关键字的方法
让你成为最历害的git提交人
在Zeus Web Server中安装PHP语言支持
再谈PHP中单双引号的区别详解
指定应用ID以获取对应的应用名称
Yii2汉字转拼音类的实例代码
Python 2与Python 3版本和编码的对比
php+ajax+json 详解及实例代码
php封装的page分页类完整实例
PHP设计模式之工厂模式与单例模式
php数组合并array_merge()函数使用注意事项
PHP实现简单爬虫的方法
php实现数组中索引关联数据转换成json对象的方法
wget使用技巧