<?php
$ID = '$Id: searches.php 952 2008-03-23 21:35:38Z bb $';
require 'app.inc.php';
require 'appmenu.inc.php';
# configure the stuff below
# your logfile
$todaylog = "/var/log/apache/drbeat.li/access.log";
$def_kb = 1024; // Number of kB to seek from the end of the file
$engines = array(
array('Google', '{\\.google\\..+/(search|imgres)\\?}i', array('q', 'as_q', 'prev')),
array('Yahoo!', '{search\\.yahoo\\.com/search\\?}i', array('p')),
array('technorati', '{technorati.com/(tags?|search)/(.+)}i', 2),
array('Altavista', '{altavista\\.}i', array('q')),
array('a9', '{a9\\.com/-/search}i', array('q')),
array('a9', '{a9\\.com/(.+)\\??}i', 1),
array('Jux2', '{jux2\\.com/}i', array('q')),
array('Wikipedia', '{wikipedia\\.org/wiki/(.+)}i', 1),
array('search.ch', '{www\\.search\\.ch/search\\.html}i', array('q'))
);
# end of config
function queryargs($uri) {
$args = array();
$parts = explode('?', $uri);
if (sizeof($parts) > 1)
foreach (explode('&', $parts[1]) as $a) {
$arg1 = explode('=', $a);
$args[$arg1[0]] = $arg1[1];
}
return $args;
}
// error_reporting(1);
$searches = array();
$kb = intval($_GET['kb']);
if ($kb < 1 && $kb != -1)
$kb = $def_kb;
$ofs = $kb * 1024;
# open and position the log file
$fd = fopen($todaylog, "r");
if ($kb != -1) {
# seek backwards from EOF
fseek($fd, 0, 2); // Go to EOF
if (ftell($fd) < $ofs)
fseek($fd, 0, 0); // Back to top of file
else {
fseek($fd, -$ofs, 2);
fgets($fd, 1024); // Discard partial line
}
}
# read the remainder
while ($x = fgets($fd, 1024)) {
// 64.68.82.204 - - [01/Mar/2004:07:26:34 +0100] "GET /robots.txt HTTP/1.0" rc size "referrer" "user agent"
list($ip, , , $datetime, $tz, , $log_uri, , , , $referrer, ) = explode(" ", $x);
$referrer = substr($referrer, 1, -1); // drop the quote characters
unset($query); unset($srch);
foreach ($engines as $e)
if (preg_match($e[1], $referrer, $matches)) {
$srch = $e[2];
break;
}
if (!isset($srch))
continue;
if (is_array($srch)) {
$args = queryargs($referrer);
foreach ($srch as $s)
if ($query = $args[$s]) {
// Special treatment for google image searches
if ($s == 'prev') {
$imgq = urldecode($query);
$args = queryargs($imgq);
$query = $args['q'];
$host = explode('/imgres', $referrer);
$referrer = $host[0] . $imgq;
}
break;
}
}
elseif (is_int($srch))
$query = $matches[$srch];
if ($query) {
$t = substr($datetime, 1, 11) . ' ' . substr($datetime, -8) . ' ' . substr($tz, 0, -1);
$searches[] = array($query, $referrer, "at $t from $ip", $log_uri);
}
}
fclose($fd);
$app = new Application("Referrers from search engines", $menu);
$app->head_add = "<meta name=\"robots\" content=\"noindex,nofollow\" />\n";
$app->header();
echo "<p>Searches from ";
foreach ($engines as $i => $e) {
echo $e[0];
if ($i == sizeof($engines) - 2)
echo ' and ';
elseif ($i < sizeof($engines) - 2)
echo ', ';
}
echo " in ";
if ($kb != -1)
echo "the last {$kb}kB of ";
echo "my access log:</p>\n<p>";
if (sizeof($searches) == 0)
echo "None.";
foreach ($searches as $s) {
$txt0 = htmlspecialchars(urldecode($s[0]));
$uri1 = str_replace('&', '&', $s[1]);
$uri3 = htmlspecialchars(urldecode($s[3]));
echo "<a href=\"$uri1\" title=\"{$s[2]}\">$txt0</a>: <a href=\"{$s[3]}\">{$uri3}</a><br />\n";
}
echo "</p>\n";
$app->footer(1);
?>