/* STEP 1 : detection if the visitor comes from Google + catch the query */
/* does the visitor comes from Google ? */
if ( isset ($_SERVER["HTTP_REFERER"]) &&
preg_match('!^http://(www|images)\.google\.[a-z]{2,3}(\.[a-z]{2,3})?/!i',
$_SERVER["HTTP_REFERER"]) )
{ $isGoogleReferer = true; }
else { $isGoogleReferer = false; }
/* if yes, we catch its query (param. q in the URL) */
if ( $isGoogleReferer && preg_match ('!q=([^\&#]*)!i',$_SERVER["HTTP_REFERER"],
$matches) )
{ $googleQuery = $matches[1]; }
else { $googleQuery = null; }
/* if the query were captured, we make it usable */
if ( $googleQuery )
{ $googleQuerySanitize = preg_replace('![^a-z0-9éèàùçÇÉÀÈÙêÊ\s]!i',
' ', rawurldecode($googleQuery) );
$google = str_replace("+", " ", $googleQuerySanitize); }
else { $googleQuery = ''; }
// END STEP 1 (detection of Google if it's the referer)
// credit for this part : most of the code is a
// simplier rewriting of a snipet by Romain Follet :
// http://www.cooldev.net/article-93-ya-pas-que-google-dans-la-vie.html
// thanks to him for this piece of code :D
/* STEP 1 BIS : détection of Google by the IP or DNS host */
$ip = strval($_SERVER['REMOTE_ADDR']); // conversion to string
$host = gethostbyaddr($ip);
$GoogleByHost = false;
$GoogleByIP = false ; // default values
if (preg_match("/crawl-66-249-[\d]{1,3}-[\d]{1,3}\.googlebot\.com/", $host))
{$GoogleByDNS = true;}
if (preg_match("/66\.249\.[\d]{1,3}\.[\d]{1,3}/", $ip))
{$GoogleByIP = true;}
// END STEP 1 BIS
// credits : http://www.actulab.com/identification-des-robots.php
// and : http://www.robots.darkseoteam.com/
// ----- ----- ----- ----- ----- ----- ----- ----- ----- -----
/* STEP 2 : definig the redirection URLs*/
/* URL for Googlebot-Image */
$URLsForGooglebotImage = array(
"http://www.someurl.net",
"http://www.someurl.net",
"http://www.someurl.net"
);
/* URL for Googlebot */
$URLsForGooglebot = array(
"http://www.someurl.net",
"http://www.someurl.net",
"http://www.someurl.net"
);
/* building the same query in others search engines */
$sameQueryButInYauba = "http://www.yauba.com/?q={$googleQuery}&target=all";
$sameQueryButInYahoo ="http://search.yahoo.com/search?p={$googleQuery}";
$sameQueryButInIxquick = "http://ixquick.com/do/metasearch.pl?query=
{$googleQuery}&cat=web&pl=ff&language=english";
/* URLS to which people coming not from Google will be redirected */
$UrlsForThoseComingFromGoogle = array(
"$sameQueryButInYauba",
"$sameQueryButInYauba",
"$sameQueryButInYauba",
"$sameQueryButInYauba",
"$sameQueryButInYauba",
"$sameQueryButInYahoo",
"$sameQueryButInYahoo",
"$sameQueryButInYahoo",
"$sameQueryButInIxquick",
"$sameQueryButInIxquick"
); // Yauba 50%, Yahoo 30%, Ixquick 20%
/* URL pfor the other visitors */
$UrlsForThoseComingNotFromGoogle = array(
"http://www.someurl.net",
"http://www.someurl.net",
"http://www.someurl.net"
);
// END STEP 2 (defining redirection URLs)
// ----- ----- ----- ----- ----- ----- ----- ----- ----- -----
/* STEP 3 : detection of the user-agent */
/* looking for "Googlebot-Image" or "Googlebot" in the signature of the UA */
$IsGooglebotImage = strpos($_SERVER["HTTP_USER_AGENT"],"Googlebot-Image");
$IsGooglebot = strpos($_SERVER["HTTP_USER_AGENT"],"Googlebot");
// END STEP 3
// ----- ----- ----- ----- ----- ----- ----- ----- ----- -----
/* STEP 4 : redirection */
/* if UA is Googlebot ou Googlebot-Image */
if ($GoogleByDNS || $GoogleByIP || $IsGooglebot !== false)
{
if ($GoogleByIP || $GoogleByDNS)
{
/* choosing an url randomly */
$indice = array_rand($URLsForGooglebot);
$destinationFinale = $URLsForGooglebot[$indice];
}
/* is the UA is Googlebot-Image */
elseif ($IsGooglebotImage !== false)
{
$indice = array_rand($URLsForGooglebotImage);
$destinationFinale = $URLsForGooglebotImage[$indice];
}
/* if UA is Googlebot (normal) */
else
{
/* chosing randomly one URL */
$indice = array_rand($URLsForGooglebot);
$finalDestination = $URLsForGooglebot[$indice];
}
/* final operation in case of Google visiting the page */
header("Location: ".$finalDestination."",TRUE,302);
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="mis">
<head>
<title>Content</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<meta http-equiv="refresh" content="0;URL='.$finalDestination.'">
</head>
<body>
<p><a href="'.$finalDestination.'" rel="me">my website</a>.</p>
</body>
</html>
';
} // END id UA is Googlebot or Googlebot-Image
/* if UA is no Googlebot */
else
{
/* if visitor comes form Google */
if ($isGoogleReferer && $googleQuery)
{
$indice = array_rand($UrlsForThoseComingFromGoogle);
$finalDestination = $UrlsForThoseComingFromGoogle[$indice];
}
/* if not from Google */
else
{
$indice = array_rand($UrlsForThoseComingNotFromGoogle);
$finalDestination = $UrlsForThoseComingNotFromGoogle[$indice];
}
/* finale operation : redirecting */
header("Location: ".$finalDestination."",TRUE,302);
}
// END STEP 4 (redirection)
// ----- ----- ----- ----- ----- ----- ----- ----- ----- -----
exit;
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="fr">
<head>
<title>First name NAME</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
</head>
<body>
<p><a href="http://wwww.mywebsite.net" rel="me">my web site</a>.</p>
</body>
</html>
Get the code for Googlebots cloaking by IP adresses, DNS hosts and user-agent. (Format : .txt, same content as the lines above.)
Written by Mathias Poujol-Rost based on a snipet by Romain Follet.
Thanks : cloaking tutorial, list of IP addresses and DNS hosts of Google.
2009 - Mathias Poujol-Rost - Feel free to use this script :D (licence CC BY).