wg-basic/basic-util/src/main/java/ink/wgink/util/HtmlHelper.java

107 lines
3.5 KiB
Java

package ink.wgink.util;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HtmlHelper {
/**
* 去除script
*/
private static final Pattern P_SCRIPT = Pattern.compile("<script[^>]*?>[\\s\\S]*?<\\/script>",
Pattern.CASE_INSENSITIVE);
/**
* 去除style的正则表达式
*/
private static final Pattern P_STYLE = Pattern.compile("<style[^>]*?>[\\s\\S]*?<\\/style>",
Pattern.CASE_INSENSITIVE);
/**
* 定义HTML标签的正则表达式
*/
private static final Pattern P_HTML = Pattern.compile("<[^>]+>", Pattern.CASE_INSENSITIVE);
/**
* 定义空格回车换行符
*/
private static final Pattern P_SPACE = Pattern.compile("\\s*|\t|\r|\n",
Pattern.CASE_INSENSITIVE);
/**
* <p>
* title HtmlHelper
* </p>
* <p>
* description 获取纯文本
* </p>
*
* @param html
* @return
* @author WenG
* @date 2018年6月14日 下午2:31:32
* @modifier WenG
* @date 2018年6月14日 下午2:31:32
*/
public static String getText(String html) {
// 过滤script标签
Matcher mScript = P_SCRIPT.matcher(html);
html = mScript.replaceAll("");
// 过滤style标签
Matcher mStyle = P_STYLE.matcher(html);
html = mStyle.replaceAll("");
// 过滤html标签
Matcher mHtml = P_HTML.matcher(html);
html = mHtml.replaceAll("");
// 过滤空格回车标签
Matcher mSpace = P_SPACE.matcher(html);
html = mSpace.replaceAll("");
return html;
}
/**
* 格式化HTML代码
*
* @param code html代码
* @param indentType 缩进类型,默认是空格
* @param indentCount 缩进数量
* @return
*/
public static String formatHtml(String code, String indentType, int indentCount) {
if (StringUtils.isBlank(code)) {
return null;
}
code = code.replaceAll(">\\s+<", "><");
String html = Jsoup.parseBodyFragment(code).body().html();
if (StringUtils.isBlank(html)) {
return null;
}
if (indentType == null) {
indentType = " ";
}
if (indentCount < 0) {
indentCount = 0;
}
String[] htmlArray = html.split("\n");
String result = "";
for (String htmlLine : htmlArray) {
int startIndex = htmlLine.indexOf("<");
String tab = "";
for (int i = 0; i < startIndex + indentCount; i++) {
tab += indentType;
}
htmlLine = tab + htmlLine.trim();
System.out.println(htmlLine);
result += htmlLine + "\n";
}
return result;
}
public static void main(String[] args) {
String code = "<div id=\"input_1\" class=\"layui-form-item \" data-id=\"input_1\" data-tag=\"input\" data-index=\"0\"> <label class=\"layui-form-label layui-form-required\" style=\"width: 110px;\"><span style=\"color:red;\">*</span>单行文本:</label> <div class=\"layui-input-block\" style=\"margin-left: 110px\"> <input name=\"input_1\" value=\"\" placeholder=\"请输入\" class=\"layui-input\" lay-vertype=\"tips\" lay-verify=\"required\" style=\"width:100%\"> </div></div>";
String result = formatHtml(code, "\t", 0);
System.out.println(result);
}
}