<?php

$ID = '$Id: searches.php 952 2008-03-23 21:35:38Z bb $';

require 'app.inc.php';
require 'appmenu.inc.php';

# configure the stuff below

# your logfile
$todaylog = "/var/log/apache/drbeat.li/access.log";
$def_kb = 1024;    // Number of kB to seek from the end of the file

$engines = array(
    array('Google', '{\\.google\\..+/(search|imgres)\\?}i', array('q', 'as_q', 'prev')),
    array('Yahoo!', '{search\\.yahoo\\.com/search\\?}i', array('p')),
    array('technorati', '{technorati.com/(tags?|search)/(.+)}i', 2),
    array('Altavista', '{altavista\\.}i', array('q')),
    array('a9', '{a9\\.com/-/search}i', array('q')),
    array('a9', '{a9\\.com/(.+)\\??}i', 1),
    array('Jux2', '{jux2\\.com/}i', array('q')),
    array('Wikipedia', '{wikipedia\\.org/wiki/(.+)}i', 1),
    array('search.ch', '{www\\.search\\.ch/search\\.html}i', array('q'))
);

# end of config

function queryargs($uri) {
    $args = array();
    $parts = explode('?', $uri);
    if (sizeof($parts) > 1)
    foreach (explode('&', $parts[1]) as $a) {
        $arg1 = explode('=', $a);
        $args[$arg1[0]] = $arg1[1];
    }
    return $args;
}

// error_reporting(1);

$searches = array();

$kb = intval($_GET['kb']);
if ($kb < 1 && $kb != -1)
    $kb = $def_kb;
$ofs = $kb * 1024;

# open and position the log file
$fd = fopen($todaylog, "r");
if ($kb != -1) {
    # seek backwards from EOF
    fseek($fd, 0, 2);    // Go to EOF
    if (ftell($fd) < $ofs)
    fseek($fd, 0, 0);    // Back to top of file
    else {
    fseek($fd, -$ofs, 2);
    fgets($fd, 1024);    // Discard partial line
    }
}

# read the remainder
while ($x = fgets($fd, 1024)) {

// 64.68.82.204 - - [01/Mar/2004:07:26:34 +0100] "GET /robots.txt HTTP/1.0" rc size "referrer" "user agent"
    list($ip, , , $datetime, $tz, , $log_uri, , , , $referrer, ) = explode(" ", $x);

    $referrer = substr($referrer, 1, -1);    // drop the quote characters

    unset($query); unset($srch);
    foreach ($engines as $e)
    if (preg_match($e[1], $referrer, $matches)) {
        $srch = $e[2];
        break;
    }

    if (!isset($srch))
    continue;
    if (is_array($srch)) {
    $args = queryargs($referrer);
    foreach ($srch as $s)
        if ($query = $args[$s]) {
        // Special treatment for google image searches
        if ($s == 'prev') {
            $imgq = urldecode($query);
            $args = queryargs($imgq);
            $query = $args['q'];
            $host = explode('/imgres', $referrer);
            $referrer = $host[0] . $imgq;
        }
        break;
        }
    }
    elseif (is_int($srch))
    $query = $matches[$srch];

    if ($query) {
    $t = substr($datetime, 1, 11) . ' ' . substr($datetime, -8) . ' ' . substr($tz, 0, -1);
    $searches[] = array($query, $referrer, "at $t from $ip", $log_uri);
    }
}
fclose($fd);

$app = new Application("Referrers from search engines", $menu);

$app->head_add = "<meta name=\"robots\" content=\"noindex,nofollow\" />\n";
$app->header();

echo "<p>Searches from ";
foreach ($engines as $i => $e) {
    echo $e[0];
    if ($i == sizeof($engines) - 2)
    echo ' and ';
    elseif ($i < sizeof($engines) - 2)
    echo ', ';
}
echo " in ";
if ($kb != -1)
    echo "the last {$kb}kB of ";
echo "my access log:</p>\n<p>";

if (sizeof($searches) == 0)
    echo "None.";
foreach ($searches as $s) {
    $txt0 = htmlspecialchars(urldecode($s[0]));
    $uri1 = str_replace('&', '&amp;', $s[1]);
    $uri3 = htmlspecialchars(urldecode($s[3]));
    echo "<a href=\"$uri1\" title=\"{$s[2]}\">$txt0</a>: <a href=\"{$s[3]}\">{$uri3}</a><br />\n";
}
echo "</p>\n";

$app->footer(1);

?>