107 lines
3.5 KiB
Java
107 lines
3.5 KiB
Java
package ink.wgink.util;
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.jsoup.Jsoup;
|
|
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
public class HtmlHelper {
|
|
|
|
/**
|
|
* 去除script
|
|
*/
|
|
private static final Pattern P_SCRIPT = Pattern.compile("<script[^>]*?>[\\s\\S]*?<\\/script>",
|
|
Pattern.CASE_INSENSITIVE);
|
|
/**
|
|
* 去除style的正则表达式
|
|
*/
|
|
private static final Pattern P_STYLE = Pattern.compile("<style[^>]*?>[\\s\\S]*?<\\/style>",
|
|
Pattern.CASE_INSENSITIVE);
|
|
/**
|
|
* 定义HTML标签的正则表达式
|
|
*/
|
|
private static final Pattern P_HTML = Pattern.compile("<[^>]+>", Pattern.CASE_INSENSITIVE);
|
|
/**
|
|
* 定义空格回车换行符
|
|
*/
|
|
private static final Pattern P_SPACE = Pattern.compile("\\s*|\t|\r|\n",
|
|
Pattern.CASE_INSENSITIVE);
|
|
|
|
/**
|
|
* <p>
|
|
* title HtmlHelper
|
|
* </p>
|
|
* <p>
|
|
* description 获取纯文本
|
|
* </p>
|
|
*
|
|
* @param html
|
|
* @return
|
|
* @author WenG
|
|
* @date 2018年6月14日 下午2:31:32
|
|
* @modifier WenG
|
|
* @date 2018年6月14日 下午2:31:32
|
|
*/
|
|
public static String getText(String html) {
|
|
// 过滤script标签
|
|
Matcher mScript = P_SCRIPT.matcher(html);
|
|
html = mScript.replaceAll("");
|
|
// 过滤style标签
|
|
Matcher mStyle = P_STYLE.matcher(html);
|
|
html = mStyle.replaceAll("");
|
|
// 过滤html标签
|
|
Matcher mHtml = P_HTML.matcher(html);
|
|
html = mHtml.replaceAll("");
|
|
// 过滤空格回车标签
|
|
Matcher mSpace = P_SPACE.matcher(html);
|
|
html = mSpace.replaceAll("");
|
|
return html;
|
|
}
|
|
|
|
/**
|
|
* 格式化HTML代码
|
|
*
|
|
* @param code html代码
|
|
* @param indentType 缩进类型,默认是空格
|
|
* @param indentCount 缩进数量
|
|
* @return
|
|
*/
|
|
public static String formatHtml(String code, String indentType, int indentCount) {
|
|
if (StringUtils.isBlank(code)) {
|
|
return null;
|
|
}
|
|
code = code.replaceAll(">\\s+<", "><");
|
|
String html = Jsoup.parseBodyFragment(code).body().html();
|
|
if (StringUtils.isBlank(html)) {
|
|
return null;
|
|
}
|
|
if (indentType == null) {
|
|
indentType = " ";
|
|
}
|
|
if (indentCount < 0) {
|
|
indentCount = 0;
|
|
}
|
|
String[] htmlArray = html.split("\n");
|
|
String result = "";
|
|
for (String htmlLine : htmlArray) {
|
|
int startIndex = htmlLine.indexOf("<");
|
|
String tab = "";
|
|
for (int i = 0; i < startIndex + indentCount; i++) {
|
|
tab += indentType;
|
|
}
|
|
htmlLine = tab + htmlLine.trim();
|
|
System.out.println(htmlLine);
|
|
result += htmlLine + "\n";
|
|
}
|
|
return result;
|
|
}
|
|
|
|
public static void main(String[] args) {
|
|
String code = "<div id=\"input_1\" class=\"layui-form-item \" data-id=\"input_1\" data-tag=\"input\" data-index=\"0\"> <label class=\"layui-form-label layui-form-required\" style=\"width: 110px;\"><span style=\"color:red;\">*</span>单行文本:</label> <div class=\"layui-input-block\" style=\"margin-left: 110px\"> <input name=\"input_1\" value=\"\" placeholder=\"请输入\" class=\"layui-input\" lay-vertype=\"tips\" lay-verify=\"required\" style=\"width:100%\"> </div></div>";
|
|
String result = formatHtml(code, "\t", 0);
|
|
System.out.println(result);
|
|
}
|
|
|
|
}
|