以下代码用于采集页面时,获取网页中所有的链接,并循环输出:

$html = file_get_contents('http://www.w3cmap.com');
 
$dom = new DOMDocument();
@$dom->loadHTML($html);
 
// grab all the on the page
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
 
for ($i = 0; $i < $hrefs->length; $i++) {
       $href = $hrefs->item($i);
       $url = $href->getAttribute('href');
       echo $url.'
'; }