搬运-蜘蛛php测试代码
可以通过php的gethostbyname功能
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
function testbotip( $ip , $agent ) { $hostname = gethostbyaddr ( $ip ); if ( $hostname == $ip ) return false; $rip = gethostbyname ( $hostname ); // we use long version because there could be multiple A records. print "Ip:$ip\tHostname: $hostname = $agent\t\t" ; if ( $ip == $rip ) { //host is not faked so now let's see if it is who it says it is via agent if (preg_match( "/bing|msnbot/I" , $agent )&&(preg_match( "/msn\.com/I" , $hostname ))) return true; if (preg_match( "/Google/I" , $agent )&&(preg_match( "/Google\.com/I" , $hostname ))) return true; if (preg_match( "/yahoo/I" , $agent )&&(preg_match( "/yahoo\.com/I" , $hostname ))) return true; if (preg_match( "/twittervir/I" , $agent )&&(preg_match( "/twttr\.com/I" , $hostname ))) return true; //ok done standard ones we know ... now we need to try generic test. //good bots will give a domain where they can be looked up. This should match their reverse ip domain. preg_match( "/([\w]+\.[\w]+)($|\.uk$)/" , strtolower ( $hostname ), $matches ); $dom1 = $matches [0]; if (!( strpos ( $agent , $dom1 ) !== false)) { //echo "$dom1 NOT FOUND !!\n"; return false; } return true; } return false; } |