/**
*
* @param s
* @return 获得网页标题
*/
public String getTitle(final String s)
{
String regex;
String title = "";
final List<String> list = new ArrayList<String>();
regex = "<title>.*?</title>";
final Pattern pa = Pattern.compile(regex, Pattern.CANON_EQ);
final Matcher ma = pa.matcher(s);
while (ma.find())
{
list.add(ma.group());
}
for (int i = 0; i < list.size(); i++)
{
title = title + list.get(i);
}
return outTag(title);
}
/**
*
* @param s
* @return 获得链接
*/
public List<String> getLink(final String s)
{
String regex;
final List<String> list = new ArrayList<String>();
regex = "<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)</a>";
final Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
final Matcher ma = pa.matcher(s);
while (ma.find())
{
list.add(ma.group());
}
return list;
}
/**
*
* @param s
* @return 获得脚本代码
*/
public List<String> getScript(final String s)
{
String regex;
final List<String> list = new ArrayList<String>();
regex = "<script.*?</script>";
final Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
final Matcher ma = pa.matcher(s);
while (ma.find())
{
list.add(ma.group());
}
return list;
}
/**
*
* @param s
* @return 获得CSS
*/
public List<String> getCSS(final String s)
{
String regex;
final List<String> list = new ArrayList<String>();
regex = "<style.*?</style>";
final Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
final Matcher ma = pa.matcher(s);
while (ma.find())
{
list.add(ma.group());
}
return list;
}