最近百度动作挺大的,搜索算法频繁调整。刚才在分析本站百度收录情况的时候又发现百度的搜索结果连接竟然也变了,想必这么做是对付360恶意抓取百度的搜索结果的吧。没办法,百度变了,俺这收录分析工具也得跟着变一变了,总得将这奇长无比的链接转为正常的链接才行吧!研究了一下,发现还是可以转变的。
function getRealUrl($url){ $header=get_headers($url,1); if (strpos($header[0],'301')||strpos($header[0],'302')){ if(is_array($header['Location'])){ return $header['Location'][count($header['Location'])-1]; }else{ return $header['Location']; } }else{ return $url; } }
例如下面这个例子:
<?php header('Content-type:text/html;charset=utf-8'); function getRealUrl($url){ $header=get_headers($url,1); if (strpos($header[0],'301')||strpos($header[0],'302')){ if(is_array($header['Location'])){ return $header['Location'][count($header['Location'])-1]; }else{ return $header['Location']; } }else{ return $url; } } $url='http://www.baidu.com/link?url=b5ea9243fe3a2e414f6dda3dff90ed9d97f285d7211bbcef34c69c98f58d61712209b5fe75dced9f58b81ccb78e5c5275abe581fc56ac4a8ac52f2f70a08425c953978cfd5fbb590d53c72f8cdfe9d08771f4d4a27cd8c0cb1615a58b4a4d264c5055b621047d3d3a905ceefdc5375e33aba3afeddfe8c26531e41c110f268f5f770e75923d01b3bbc11aeab3e82d0f7a18dae5de66d78a6a87b7d163859b87af9439a2f81bf086917d308a1dffba4df39ae561594ae211194b42ca201e570e6d69d790665cd58b903fbc95772983b2e72530dabe6bef294db0f8626664a97e5f69739f26789d912774b8cb951f42c9716c2d5c87f7ccc91753c0792c601e292c27e59adf9cc9cf0e428ca12f33095702ade810df1bdf3bb18fd67b833471523a7a86b839d18d8dbd841558cb8b91713952e84fee2e4abb69c5998be1bac2271743f6e05527d624dec34467d525b0298020d8a5136f19c80e43e8a2d882aebac5573358cc3c0659257bfcf546427d385fef66539a01e0c7f49996bb4437c'; $url=getRealUrl($url); echo '真实的url为:'.$url;
文章想法参考自:http://enenba.com/?post=237