PHP HTML转UBB 2.0


之前版本对于换行处理有些问题,进行了一些改进

<?php

/**
 * html 转 ubb 格式
 *
 * @param string $str
 * @return string
 */
function ubb_encode($str) &#123;
    if (empty($str)) &#123;
        return $str;
    &#125;
    $match_map = array(
        'strong' => array('start' => '[b]', 'end' => '[/b]',),
        'em' => array('start' => '[i]', 'end' => '[/i]',),
        'text-align' => array('start' => '[align=%s]', 'end' => '[/align]',),
        'font-size' => array(
            'function' => 'px_to_em',
            'start' => '[size=%s]', 'end' => '[/size]',
        ),
        'background-color' => array(
            'function' => 'rgb_to_hex',
            'start' => '[backcolor=%s]', 'end' => '[/backcolor]',
        ),
        'color' => array(
            'function' => 'rgb_to_hex',
            'start' => '[color=%s]', 'end' => '[/color]',
        ),
        'text-decoration' => array(
            'condition' => 'underline', 'style' => '',
            'start' => '[u]', 'end' => '[/u]',
        ),
    );
    $tags_list = html_tags_grap($str);  // 抓取HTML中的标签
    if (empty($tags_list)) &#123;  // 没有抓取到标签
        return $str;
    &#125;
    $end_pop = array();  // 结束标签
    $ubb_str = '';   // 转编码后的字符串
    $is_p = 0;  // 是否是p标签
    foreach ($tags_list as $tag_info) &#123;
        $tag_name = $tag_info['name']; // 标签名
        $contents = trim($tag_info['content']); // 内容
        if ($tag_info['is_end'] == 1) &#123; // 标签结束
            $ubb_str .= array_pop($end_pop) . $contents;
            if ($tag_name == 'p') &#123;
                $is_p = 0;
            &#125;
            continue;
        &#125;
        $style = $tag_info['style']; // 样式
        // 标签处理
        switch ($tag_name) &#123;
            case 'a' : // a 标签
                $link = $tag_info['self'];
                if (stripos($link, 'mailto') === FALSE) &#123;
                    $ubb_str .= '[url=' . $link . ']' . $contents;
                    $end_pop[] = '[/url]';
                    continue;
                &#125;
                // 处理 邮件地址
                $ubb_str .= '[email=' . str_replace('mailto:', '', $link) . ']' . $contents;
                $end_pop[] = '[/email]';
                continue;
            case 'br':  // 换行
                if ($is_p == 1) &#123;  // p标签内的br不换行
                    $ubb_str .= $contents;
                &#125; else &#123;
                    $ubb_str .= PHP_EOL . $contents;
                &#125;
                continue;
            case 'strong' :  // 粗体
            case 'u' :  // 下划线
            case 'em' :  // 斜体
            case 'b' :  // 粗体
            case 'i' :
                if (isset($match_map[$tag_name])) &#123;  // 是否是支持的样式
                    $match_tags = $match_map[$tag_name];
                    $ubb_str .= $match_tags['start'] . $contents;
                    $end_pop[] = $match_tags['end'];
                &#125; else &#123;
                    $ubb_str .= '[' . $tag_name . ']' . $contents;
                    $end_pop[] = '[/' . $tag_name . ']';
                &#125;
                continue;
            case 'img':
                $link = $tag_info['self'];
                $ubb_str .= '[img]' . $link . '[/img]' . $contents;
                continue;
            case 'p' :
                $ubb_str .= PHP_EOL;
                $is_p = 1;  // p标签
            case 'div' :
            case 'section':
            case 'span' :
                if (empty($style)) &#123; // 空样式,不用处理样式
                    $ubb_str .= $contents;
                    continue;
                &#125;
                $tag_name_end = '';
                foreach ($style as $single_style) &#123;
                    if (empty($single_style)) &#123;
                        continue;
                    &#125;
                    $style_name = $single_style['name'];  // 样式名
                    if (!isset($match_map[$style_name])) &#123;  // 不支持处理的样式,则略过
                        continue;
                    &#125;
                    $style_value = $single_style['value']; // 样式值
                    if (isset($match_map[$style_name]['condition'])) &#123;  // 样式支持的值
                        if ($match_map[$style_name]['condition'] != $style_value) &#123;
                            continue;
                        &#125;
                    &#125;
                    if (isset($match_map[$style_name]['function'])) &#123;  // 需要特殊处理
                        $function = $match_map[$style_name]['function'];
                        if (!empty($function)) &#123;
                            $style_value = call_user_func($function, $style_value);
                        &#125;
                    &#125;
                    if (isset($match_map[$style_name]['style'])) &#123;
                        $style_value = $match_map[$style_name]['style'];
                    &#125;
                    $ubb_str .= sprintf($match_map[$style_name]['start'], $style_value);  // 样式开始标签
                    $tag_name_end = $match_map[$style_name]['end'] . $tag_name_end;  // 结束标签
                &#125;
                $ubb_str .= $contents;
                $end_pop[] = $tag_name_end;
                continue;
            default :  // 不支持的标签
                $ubb_str .= $contents;
                continue;
        &#125;
    &#125;
    $filter_keymap = array(
        '/&amp;/i' => '&',
        '/&lt;/i' => '<',
        '/&gt;/i' => '>',
        '/&nbsp;/i' => ' ',
        '/&#160;/' => ' ', // 空格
        '/\<[^>]*?\>/i' => '',
        '/\&#\d+;/' => '', // 特殊符号
    );
    return preg_replace(array_keys($filter_keymap), array_values($filter_keymap), $ubb_str);
&#125;
<?php

/**
 * 获取 html 中的标签
 *
 * @param string $str
 * @return array
 */
function html_tags_grap($str) &#123;
    preg_match_all('/(\<[\/]?([a-zA-Z]+)([^>]*)\>)([^\<]*)/', $str, $matchs);
    if (!isset($matchs[1]) || empty($matchs[1])) &#123;
        return array();
    &#125;
    $tag_list = array();
    foreach ($matchs[1] as $key => $tag) &#123;
        $tag_name = strtolower($matchs[2][$key]);  // 标签
        $content = trim($matchs[4][$key]);  // 内容
        if (strpos($tag, '</') === 0) &#123;  // 结束标签
            $tag_list[] = array(
                'tag' => $tag,
                'name' => $tag_name,
                'content' => $content,
                'is_end' => 1, // 1 结束
            );
            continue;
        &#125;
        // 处理非结束标签
        $style = trim($matchs[3][$key]);  // 样式
        if (empty($style)) &#123;  // 没有样式
            $tag_list[] = array(
                'tag' => $tag,
                'name' => $tag_name,
                'content' => $content,
                'is_end' => 0, // 0 非结束
            );
            continue;
        &#125;
        // 特殊处理
        preg_match_all('/(style|href|src|align)=\"([^\"]+)\"/i', $style, $style_match);
        if (!isset($style_match[1]) || empty($style_match[1])) &#123; // 没有支持的内容
            $tag_list[] = array(
                'tag' => $tag,
                'name' => $tag_name,
                'content' => $content,
                'is_end' => 0, // 0 非结束
            );
            continue;
        &#125;
        $style_preg = array();
        foreach ($style_match[1] as $sk => $sv) &#123;
            $style_preg[$sv] = $style_match[2][$sk];
        &#125;
        $style_arr = array();
        if (isset($style_preg['style'])) &#123;  // 有样式
            $style_split = explode(';', $style_preg['style']);  // 多样式分割
            foreach ($style_split as $single_style) &#123;
                if (empty($single_style)) &#123;
                    continue;
                &#125;
                // 示例: text-decoration: underline
                list($style_name, $style_value) = explode(':', $single_style);  // 样式分割, key=>value
                $style_name = trim($style_name);  // 样式名
                $style_arr[] = array(
                    'name' => $style_name,
                    'value' => trim($style_value),
                );
            &#125;
        &#125;
        $self = '';
        if ($tag_name == 'img') &#123;  // 图片标签
            $src = $style_preg['src'];
            if (!filter_var($src, FILTER_VALIDATE_URL)) &#123;  // 非链接
                if (strpos($src, '/') === false || strpos($src, ' ') !== false) &#123;  // 非相对路径
                    continue;
                &#125;
            &#125;
            $self = $src;
        &#125; elseif ($tag_name == 'a') &#123;  // a标签
            if (isset($style_preg['href'])) &#123;
                $self = $style_preg['href'];
            &#125;
        &#125;
        $tag_list[] = array(
            'tag' => $tag,
            'name' => $tag_name,
            'self' => $self,
            'style' => $style_arr,
            'content' => $content,
            'is_end' => 0,
        );
    &#125;
    return $tag_list;
&#125;

<?php

/**
* RGB转 十六进制
*
* @param $rgb RGB颜色的字符串 如:rgb(255,255,255);
* @return string 十六进制颜色值 如:#FFFFFF
*/
function rgb_to_hex($rgb) &#123;
    $regexp = "/^rgb\(([0-9]&#123;0,3&#125;)\,\s*([0-9]&#123;0,3&#125;)\,\s*([0-9]&#123;0,3&#125;)\)/";
    $re = preg_match($regexp, $rgb, $match);
    if ($re < 1) &#123;
        return '#FFFFFF';
    &#125;
    $re = array_shift($match);
    $hexColor = "#";
    $hex = array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F');
    for ($i = 0; $i < 3; $i++) &#123;
        $r = null;
        $c = $match[$i];
        $hexAr = array();
        while ($c > 16) &#123;
            $r = $c % 16;
            $c = ($c / 16) >> 0;
            array_push($hexAr, $hex[$r]);
        &#125;
        array_push($hexAr, $hex[$c]);
        $ret = array_reverse($hexAr);
        $item = implode('', $ret);
        $item = str_pad($item, 2, '0', STR_PAD_LEFT);
        $hexColor .= $item;
    &#125;
    return $hexColor;
&#125;
<?php

/**
* 字体 转换
*
* @param $px_size 字体px大小 如:24px;
* @return string em大小 如:1
*/
function px_to_em($px_size) &#123;
    $px_size = intval($px_size);
    $size_keymap = array(
        7 => 34,
        6 => 24,
        5 => 16,
        4 => 14,
        3 => 12,
        2 => 10,
        1 => 8,
    );
    $em = 4;  // 默认是4
    foreach ($size_keymap as $em => $px) &#123;
        if ($px_size > $px) &#123;
            break;
        &#125;
    &#125;
    return $em;
&#125;

参考文档


Author: Itaken
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint polocy. If reproduced, please indicate source Itaken !
  TOC目录