Sniffing and redirecting Google or its visitors

Original page (in French).

the PHP script to cloak Googlebot and Googlebot-Image

/* STEP 1 : detection if  the visitor comes from Google + catch the query */
/* does the visitor comes from Google ? */ 
if ( isset ($_SERVER["HTTP_REFERER"]) && 
preg_match('!^http://(www|images)\.google\.[a-z]{2,3}(\.[a-z]{2,3})?/!i', 
$_SERVER["HTTP_REFERER"]) )
    { $isGoogleReferer = true; } 
else     { $isGoogleReferer = false; } 

/* if yes, we catch its query (param. q in the URL) */ 
if ( $isGoogleReferer && preg_match ('!q=([^\&#]*)!i',$_SERVER["HTTP_REFERER"],
$matches) )
    { $googleQuery = $matches[1]; } 
else     { $googleQuery = null; } 

/* if the query were captured, we make it usable */ 
if ( $googleQuery )
    { $googleQuerySanitize =  preg_replace('![^a-z0-9éèàùçÇÉÀÈÙêÊ\s]!i', 
' ', rawurldecode($googleQuery) );
$google = str_replace("+", " ", $googleQuerySanitize); } 
else     { $googleQuery = ''; } 
// END STEP 1 (detection of Google if it's the referer) 
// credit for this part : most of the code is a 
// simplier rewriting of a snipet by Romain Follet : 
// http://www.cooldev.net/article-93-ya-pas-que-google-dans-la-vie.html 
// thanks to him for this piece of code :D

/* STEP 1 BIS : détection of Google by the IP or DNS host */
$ip = strval($_SERVER['REMOTE_ADDR']);  // conversion to string
$host = gethostbyaddr($ip);

$GoogleByHost = false;
$GoogleByIP = false ; // default values

if (preg_match("/crawl-66-249-[\d]{1,3}-[\d]{1,3}\.googlebot\.com/", $host))
    {$GoogleByDNS = true;}

if (preg_match("/66\.249\.[\d]{1,3}\.[\d]{1,3}/", $ip))
    {$GoogleByIP = true;}
// END STEP 1 BIS
// credits : http://www.actulab.com/identification-des-robots.php
// and : http://www.robots.darkseoteam.com/
// ----- ----- ----- ----- ----- ----- ----- ----- ----- -----
 
 
/* STEP 2 : definig  the redirection URLs*/
/* URL for Googlebot-Image */
$URLsForGooglebotImage = array( 
"http://www.someurl.net",
"http://www.someurl.net",
"http://www.someurl.net"
); 

/* URL for Googlebot */
$URLsForGooglebot = array(
"http://www.someurl.net",
"http://www.someurl.net",
"http://www.someurl.net"
); 
 
/* building the same query in others search engines */
$sameQueryButInYauba = "http://www.yauba.com/?q={$googleQuery}&target=all";
$sameQueryButInYahoo ="http://search.yahoo.com/search?p={$googleQuery}";
$sameQueryButInIxquick = "http://ixquick.com/do/metasearch.pl?query=
{$googleQuery}&cat=web&pl=ff&language=english";

/* URLS to which people coming not from Google will be redirected */
$UrlsForThoseComingFromGoogle = array(
"$sameQueryButInYauba",
"$sameQueryButInYauba",
"$sameQueryButInYauba",
"$sameQueryButInYauba",
"$sameQueryButInYauba",
"$sameQueryButInYahoo", 
"$sameQueryButInYahoo", 
"$sameQueryButInYahoo",
"$sameQueryButInIxquick", 
"$sameQueryButInIxquick"
); // Yauba 50%, Yahoo 30%, Ixquick 20%

/* URL pfor the other visitors */
$UrlsForThoseComingNotFromGoogle = array( 
"http://www.someurl.net",
"http://www.someurl.net",
"http://www.someurl.net"
); 
// END STEP 2 (defining redirection URLs)
// ----- ----- ----- ----- ----- ----- ----- ----- ----- -----


/* STEP 3 : detection of the user-agent */
/* looking for "Googlebot-Image" or "Googlebot" in the signature of the UA */
$IsGooglebotImage = strpos($_SERVER["HTTP_USER_AGENT"],"Googlebot-Image");
$IsGooglebot = strpos($_SERVER["HTTP_USER_AGENT"],"Googlebot");
// END STEP 3
// ----- ----- ----- ----- ----- ----- ----- ----- ----- -----


/* STEP 4 : redirection */
/* if UA is Googlebot ou Googlebot-Image */
if ($GoogleByDNS || $GoogleByIP || $IsGooglebot !== false) 
{
    if ($GoogleByIP || $GoogleByDNS)
    {
	/* choosing an url randomly */
	$indice = array_rand($URLsForGooglebot);
	$destinationFinale = $URLsForGooglebot[$indice];
    }
    /* is the UA is Googlebot-Image */ 
    elseif ($IsGooglebotImage !== false) 
    {
	$indice = array_rand($URLsForGooglebotImage);
	$destinationFinale = $URLsForGooglebotImage[$indice]; 
    }
    /* if UA is Googlebot (normal) */
    else
    {
            /* chosing randomly one URL */
            $indice = array_rand($URLsForGooglebot);
            $finalDestination = $URLsForGooglebot[$indice];
    }
    
/* final operation in case of Google visiting the page */ 
header("Location: ".$finalDestination."",TRUE,302);    
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="mis">
<head>
    <title>Content</title>
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
     <meta http-equiv="refresh" content="0;URL='.$finalDestination.'">

</head>
<body>
<p><a href="'.$finalDestination.'" rel="me">my website</a>.</p>

</body>
</html>

';
} // END id UA is Googlebot or Googlebot-Image 

/* if UA is no Googlebot */
else 
{
    /* if visitor comes form Google */
    if ($isGoogleReferer && $googleQuery) 
    {
            $indice = array_rand($UrlsForThoseComingFromGoogle);
            $finalDestination = $UrlsForThoseComingFromGoogle[$indice];
    }
    /* if not from Google */
    else 
    {
            $indice = array_rand($UrlsForThoseComingNotFromGoogle);
            $finalDestination = $UrlsForThoseComingNotFromGoogle[$indice];
    }
/* finale operation : redirecting */
header("Location: ".$finalDestination."",TRUE,302);
}
// END STEP 4 (redirection) 
// ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- 
exit;

?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="fr">
<head>
    <title>First name NAME</title>
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />

</head>
<body>
<p><a href="http://wwww.mywebsite.net" rel="me">my web site</a>.</p>
</body>

</html>

Download the script

Get the code for Googlebots cloaking by IP adresses, DNS hosts and user-agent. (Format : .txt, same content as the lines above.)

Credits of this script

Written by Mathias Poujol-Rost based on a snipet by Romain Follet.

Thanks : cloaking tutorial, list of IP addresses and DNS hosts of Google.

2009 - Mathias Poujol-Rost - Feel free to use this script :D (licence CC BY).