$request_url ='https://www.google.es/search?q=Barcelona';
// The Regular Expression filter
$reg_exUrl = "/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/";
function get_domain($url)
{
$pieces = parse_url($url);
$domain = isset($pieces['host']) ? $pieces['host'] : '';
if (preg_match('/(?P<domain>[a-z0-9][a-z0-9\-]{1,63}\.[a-z\.]{2,6})$/i', $domain, $regs)) {
return $regs['domain'];
}
return false;
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $request_url); // The url to get links from
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // We want to get the respone
$result = curl_exec($ch);
$regex='|<a.*?href="(.*?)"|';
preg_match_all($regex,$result,$parts);
$title = preg_match('/title="(.+)">/', $html, $match);
$links=$parts[1];
asort($links);
foreach($links as $link){
$pos = strpos($link, '://');
$exclude = strpos($link, 'google'); //remove google own results
if ($pos!=0 && $exclude==0){
$posini = strpos($link, 'http');
$link = substr($link, $posini);
echo "<a href='".$link."'>".get_domain($link)."</a> -> ".$link."<br>";
}
}
curl_close($ch);
Sample:
Sources: