/home/a/aquarion/sites/www.aquarionics.com/epistula/chapters/logging.inc.php
All my code (That is, anything not in the "Others" list on the right) is BSD licenced.
You can also view this page as text/plain or colour-coded source
<?PHP
/*******************************************************************************
Chapter - Logging
********************************************************************************
Display a breakdown of site statistics. Depends on either mod_sql
(log to an SQL database) or a work-alike (see contrib/apachelog2sql.pl)
$Id: logging.inc.php,v 1.3 2004/08/17 19:59:35 aquarion Exp $
$log$
*******************************************************************************/
/*if (! $user = validate_user($_SERVER['PHP_AUTH_USER'], $_SERVER['PHP_AUTH_PW'], "sysadmin")){ // Validate User is a hidden function until the user system is written :-)
// Bad or no username/password.
// Send HTTP 401 error to make the
// browser prompt the user.
header("WWW-Authenticate: " .
"Basic realm=Protected Page: " .
"Enter your username and password " .
"for access.");
header("HTTP/1.0 401 Unauthorized");
// Display message if user cancels dialog
$page->content .= "<h2>Authorization Failed</h2>"
."<p>Without a valid username and password,"
." access to this page cannot be granted."
." please click 'reload' and enter a"
." username and password when prompted."
."</p>";
#die();
} else {*/
$caching = false;
#die("Bye");
#$log = dbquickconnect("epistula");
$log = dbquickconnect("apachelogs");
$page->title="Stats for ".$wanted[1];
$sitelink = "/logging/".$wanted[1];
$site = "access_".preg_replace("/\./","_",$wanted[1]);
#array_pop($wanted);
#$site = apachelogs;
function refforpage($request, $id=false, $since = false){
global $site;
global $log;
global $wanted;
global $_EP;
if ($request == "id"){
$id = explode("-", $id);
$link = getPermalink($id[0], $id[1]);
$link = preg_replace("/".preg_quote($_EP['url'],"/")."/","", $link);
}
$query = "select count(referer) as referers, referer from $site where referer not like '".$_EP['url']."%' and referer != \"-\" and referer != \"-\" and request_uri = \"".$request."\"";
if ($since){
$query .= " and time_stamp > ".$since;
}
$query .= " group by referer order by referers desc;";
$r = safequery($query,true,$log);
$search = array(
array('name' => "Search", 'match' => "(otmpl|qry|va|word|srchKey|keyword|term|MT|K|QRY|key|searchfor|userQuery|q|p|query|search|Keywords|MetaTopic|ask)\=(cache\:.*?\+)?(.*?)(&.*|$)", 'i' => 3),
);
$searchExceptions = array(
array('regex' => "^http://images\.google", 'title' => 'something though images.google'),
array('regex' => "^http://.*\.google.*/im", 'title' => 'something though images.google'),
array('regex' => "^http://www\.ask\.co\.uk/image", 'title' => 'something though ask.co.uk\'s Images'),
array('regex' => "^http://webferret\.search.com/click", 'title' => 'Webferrer'),
array('regex' => "^http://www.\.overture\.com", 'title' => 'Overture')
);
$livejournalURLs = array(
"^http://www\.livejournal\.com/users/(.*)/friends",
"^http://www\.livejournal\.com/~(.*)/friends",
"^http://(.*)\.livejournal\.com/friends"
);
$referers = array();
while($row = mysql_fetch_array($r)){
$matched = false;
foreach($livejournalURLs as $engine){
if (preg_match("#".$engine."#",$row['referer'], $matches)){
$terms = strtolower(urldecode($matches[1]));
$return = $referers[$terms]['count'];
if ($return == 0){
$referers[$terms] =
array(
"count" => $return + $row['referers'],
"type" => "livejournal",
"link" => $row['referer'],
"found" => "http://".$wanted[1].$row['request_uri'],
"links" => array($row['referer'])
);
} else {
$referers[$terms]["count"] = $return + $row['referers'];
$referers[$terms]["links"][] = $row['referer'];
}
#$out .= "Matched ";
$matched = true;
}
}
foreach($search as $engine){
if (preg_match("#".$engine['match']."#",$row['referer'], $matches)){
$terms = strtolower(urldecode($matches[$engine['i']]));
$return = $referers[$terms]['count'];
if ($return == 0){
$referers[$terms] =
array(
"count" => $return + $row['referers'],
"type" => "search",
"link" => $row['referer'],
"found" => "http://".$wanted[1].$row['request_uri'],
"links" => array($row['referer'])
);
} else {
$referers[$terms]["count"] = $return + $row['referers'];
$referers[$terms]["links"][] = $row['referer'];
}
$engines[$engine['name']] ++;
#$out .= "Matched ";
$matched = true;
}
}
foreach($searchExceptions as $engine){
if (preg_match("#".$engine['regex']."#",$row['referer'])){
$return = $referers[$engine['title']]['count'];
$referers[$engine['title']] =
array(
"type" => "searchexcp",
"count" => $return + $row['referers'],
"link" => $row['referer'],
"found" => "http://".$wanted[1].$row['request_uri']
);
$matched = true;
}
}
if (!$matched){
$referers[] = array(
"type" => "referal",
"count" => $row['referers'],
"link" => $row['referer'],
"found" => "http://".$wanted[1].$row['request_uri']
);
}
}
$items = array();
foreach($referers as $index => $array){
if ($array['count'] == 1){
$people = "person";
} else {
$people = "people";
}
switch ($array['type']){
case "livejournal":
$items[] = $array['count']." $people came from <a href=\"".$array['link']."\">".$index."'s Livejournal</a> to <a href=\"".$array['found']."\">here</a>";
break;
case "search":
case "searchexcp":
$id = preg_replace("/\W/","",$index);
$text = "<ul style=\"display: none;\" id=\"".$id."\">\n";
foreach($array['links'] as $link){
#$url = parse_url($link);
$text .= "<li><a href=\"".$link."\">".$link."</a></li>";
}
$text .= "</ul>";
$items[] = $array['count']." $people searching for <a href=\"".$array['link']."\">".$index."</a> found <a href=\"".$array['found']."\">this</a> [<A HREF=\"javascript:toggle('".$id."')\">+</A>]\n".$text;
break;
default:
$items[] = $array['count']." $people came from <a href=\"".$array['link']."\">".$array['link']."</a> to <a href=\"".$array['found']."\">here</a>";
}
}
return $items;
}
######################
function referers($request, $id=false, $since = false){
global $site;
global $log;
global $wanted;
global $_EP;
if ($request == "id"){
$id = explode("-", $id);
$link = getPermalink($id[0], $id[1]);
$link = preg_replace("/".preg_quote($_EP['url'],"/")."/","", $link);
}
$query = "select count(referer) as referers, referer, request_uri from $site where referer not like '".$_EP['url']."%' and referer != \"-\" and referer != \"-\" and referer not like '%cache:%' ";
if ($id){
$query .= " and request_uri like '".$link."'";
}
if ($since){
$query .= " and time_stamp > ".$since;
}
$query .= " group by referer order by referers desc;";
$r = safequery($query,true,$log);
$search = array(
array('name' => "Search", 'match' => "(otmpl|qry|va|word|srchKey|keyword|term|MT|K|QRY|key|searchfor|userQuery|q|p|query|search|Keywords|MetaTopic|ask)\=(cache\:.*?\+)?(.*?)(&.*|$)", 'i' => 3),
);
$searchExceptions = array(
array('regex' => "^http://images\.google", 'title' => 'something though images.google'),
array('regex' => "^http://.*\.google.*/im", 'title' => 'something though images.google'),
array('regex' => "^http://www\.ask\.co\.uk/image", 'title' => 'something though ask.co.uk\'s Images'),
array('regex' => "^http://webferret\.search.com/click", 'title' => 'Webferrer'),
array('regex' => "^http://www.\.overture\.com", 'title' => 'Overture')
);
$livejournalURLs = array(
"^http://www\.livejournal\.com/users/(.*)/friends",
"^http://www\.livejournal\.com/~(.*)/friends",
"^http://(.*)\.livejournal\.com/friends"
);
$searches = array();
$referers = array();
$livejournals = array();
while($row = mysql_fetch_array($r)){
$matched = false;
foreach($livejournalURLs as $engine){
if (preg_match("#".$engine."#",$row['referer'], $matches)){
$return = $livejournals[urldecode($matches[$engine['i']])]['count'];
$livejournals[urldecode($matches[1])] =
array(
"count" => $return + $row['referers'],
"link" => $row['referer'],
"found" => "http://".$wanted[1].$row['request_uri']
);
#$out .= "Matched ";
$matched = true;
}
}
foreach($search as $engine){
if (preg_match("#".$engine['match']."#",$row['referer'], $matches)){
$return = $searches[urldecode($matches[$engine['i']])]['count'];
$searches[urldecode($matches[$engine['i']])] =
array(
"count" => $return + $row['referers'],
"link" => $row['referer'],
"found" => "http://".$wanted[1].$row['request_uri']
);
$engines[$engine['name']] ++;
#$out .= "Matched ";
$matched = true;
}
}
foreach($searchExceptions as $engine){
if (preg_match("#".$engine['regex']."#",$row['referer'])){
$return = $searches[$engine['title']]['count'];
$searches[$engine['title']] =
array(
"count" => $return + $row['referers'],
"link" => $row['referer'],
"found" => "http://".$wanted[1].$row['request_uri']
);
$matched = true;
}
}
if (!$matched){
$referers[] = array(
"count" => $row['referers'],
"link" => $row['referer'],
"found" => "http://".$wanted[1].$row['request_uri']
);
}
}
$outsearch = array();
foreach($searches as $search => $array){
if ($array['count'] == 1){
$people = "person";
} else {
$people = "people";
}
$outsearch[] = $array['count']." $people searching for <a href=\"".$array['link']."\">".$search."</a> found <a href=\"".$array['found']."\">this</a><ul style=\"display: none;\" id=\"".preg_replace("/\w/","",$search)."\">Test</ul>";
}
$outrefs = array();
foreach($referers as $array){
if ($array['count'] == 1){
$people = "person";
} else {
$people = "people";
}
$outrefs[] = $array['count']." $people came from <a href=\"".$array['link']."\">".$array['link']."</a> to <a href=\"".$array['found']."\">here</a>";
}
$outljs = array();
foreach($livejournals as $lj => $array){
if ($array['count'] == 1){
$people = "person";
} else {
$people = "people";
}
$outljs[] = $array['count']." $people came from <a href=\"".$array['link']."\">".$lj."'s Livejournal</a> to <a href=\"".$array['found']."\">here</a>";
}
if ($request == "search"){
$items = $outsearch;
} elseif ($request == "lj"){
$items = $outljs;
} elseif ($request == "id"){
$items = array($outsearch,$outrefs);
} else {
$items = $outrefs;
}
return $items;
}
function queryTable($result)
{
$numcols = mysql_num_fields($result);
$out = "<table border=1>\n<tr>";
for ($i=0;$i<$numcols;$i++)
{
$field = mysql_field_name($result, $i);
if ($field == "time_stamp"){
$order[$i] = "datetime";
}
$out .= "<th>" . $field. "</th>\n";
}
$out .= "</tr>\n"; // end table header
// table body
while ($row = mysql_fetch_row($result))
{
$out .= "<tr>\n";
for ($i=0;$i<$numcols;$i++)
{
$value = $row[$i];
if ($order[$i]){
switch ($order[$i]){
case "datetime":
if (date("d-m-Y",$value) == date("d-m-Y")){
$value = date("h:m.i",$value);
} else {
$value = date("d-m-Y/h:m.i",$value);
}
}
}
$out .= "<td>";
if (!isset ($row[$i])) // Reached end?
{$out .= "NULL";}
else
{$out .= $value;}
$out .= "</td>\n";
}
$out .= "</tr>\n";
}
$out .= "</table>";
mysql_free_result($result);
return $out;
}
$agents = array(
'Opera 7' => array('count' => 0, 'match' => "/^Opera\/7/", 'group' => 'Opera'),
'Opera <7' => array('count' => 0, 'match' => "/^Opera/", 'group' => 'Opera'),
'AOL' => array('count' => 0, 'match' => "/AOL/", 'group' => 'Opera'),
'IE 6' => array('count' => 0, 'match' => "/MSIE 6\.0/", 'group' => 'Internet Explorer'),
'IE <5' => array('count' => 0, 'match' => "/MSIE/", 'group' => 'Internet Explorer'),
'WebCrawler' => array('count' => 0, 'match' => "/^FAST-WebCrawler/", 'group' => 'Robots'),
'Inktomi' => array('count' => 0, 'match' => "/slurp@inktomi.com/", 'group' => 'Robots'),
'Ask Jeeves' => array('count' => 0, 'match' => "/Teoma\)$/", 'group' => 'Robots'),
'Grub' => array('count' => 0, 'match' => "/grub-client/", 'group' => 'Robots'),
'LARBIN' => array('count' => 0, 'match' => "/larbin/i", 'group' => 'Robots'),
'Firefox' => array('count' => 0, 'match' => "/Firebird/", 'group' => 'Firefox'),
'Phoenix' => array('count' => 0, 'match' => "/Phoenix/", 'group' => 'Firefox'),
'Firebird' => array('count' => 0, 'match' => "/Firebird/", 'group' => 'Firefox'),
'wget' => array('count' => 0, 'match' => "/^Wget/", 'group' => 'Wget'),
#'Mozilla 1.4b' => array('count' => 0, 'match' => "/rv:1.4b\) Gecko/", 'group' => 'Mozilla')
'Safari' => array('count' => 0, 'match' => "/Safari\//", 'group' => 'Safari'),
'KHTML' => array('count' => 0, 'match' => "/KHTML/", 'group' => 'KHTML'),
'Konqueror' => array('count' => 0, 'match' => "/Konqueror/", 'group' => 'KHTML'),
'Netscape 4' => array('count' => 0, 'match' => "/^Mozilla\/4\./", 'group' => 'Netscape'),
'Netscape <4' => array('count' => 0, 'match' => "/^Mozilla\/[1|2|3]\./", 'group' => 'Netscape'),
'Mozilla' => array('count' => 0, 'match' => "/Gecko/", 'group' => 'Mozilla'),
'Google' => array('count' => 0, 'match' => "/^Googlebot/", 'group' => 'Robots'),
'Yahoo' => array('count' => 0, 'match' => "/Yahoo\! Slurp/", 'group' => 'Robots'),
'Scooter' => array('count' => 0, 'match' => "/^Scooter/", 'group' => 'Robots'),
'Archive.org' => array('count' => 0, 'match' => "/^ia_archiver$/", 'group' => 'Robots'),
'Python' => array('count' => 0, 'match' => "/^Python-urllib/", 'group' => 'Robots'),
'PHP' => array('count' => 0, 'match' => "/^PHP/", 'group' => 'Robots'),
'NPBot' => array('count' => 0, 'match' => "/^NPBot/", 'group' => 'Robots'),
'Zao' => array('count' => 0, 'match' => "/^Zao\//", 'group' => 'Robots'),
'linkhype.com' => array('count' => 0, 'match' => "/^linkhype.com\//", 'group' => 'Robots'),
'NutchOrg' => array('count' => 0, 'match' => "/^Nutch\//", 'group' => 'Robots'),
'Almaden Crawler' => array('count' => 0, 'match' => "/^http:\/\/www.almaden.ibm.com\/cs\/crawler\//", 'group' => 'Robots'),
'MSN Bot' => array('count' => 0, 'match' => "/^MSNBOT/i", 'group' => 'Robots'),
'TranSGeniKBot' => array('count' => 0, 'match' => "/^transgenikbot/i", 'group' => 'Robots'),
'QuepasaCreep' => array('count' => 0, 'match' => "/^QuepasaCreep/", 'group' => 'Unknown'),
'Mail Sweeper' => array('count' => 0, 'match' => "/^Mail Sweeper/", 'group' => 'Unknown'),
'Radio Userland' => array('count' => 0, 'match' => "/^Radio UserLand/", 'group' => 'RSS Readers'),
'FeedOnFeeds' => array('count' => 0, 'match' => "/^FeedOnFeeds/", 'group' => 'RSS Readers'),
'Syndic8' => array('count' => 0, 'match' => "/^Syndic8/", 'group' => 'RSS Readers'),
'WeblogMonitor' => array('count' => 0, 'match' => "/^WeblogMonitor/", 'group' => 'RSS Readers'),
'NNTP://RSS' => array('count' => 0, 'match' => "/^nntp\/\/rss/", 'group' => 'RSS Readers'),
'Radio Userland' => array('count' => 0, 'match' => "/^Radio UserLand/", 'group' => 'RSS Readers'),
'SharpReader' => array('count' => 0, 'match' => "/^SharpReader/", 'group' => 'RSS Readers'),
'KNewsTicker' => array('count' => 0, 'match' => "/^KNewsTicker/", 'group' => 'RSS Readers'),
'PostNuke' => array('count' => 0, 'match' => "/^PostNuke\:/", 'group' => 'RSS Readers'),
'rssSearch Harvester' => array('count' => 0, 'match' => "/^rssSearch Harvester/", 'group' => 'RSS Readers'),
'netNewsWire' => array('count' => 0, 'match' => "/^NetNewsWire/", 'group' => 'RSS Readers'),
'AmphetaDesk' => array('count' => 0, 'match' => "/^AmphetaDesk/", 'group' => 'RSS Readers'),
'Syndirella' => array('count' => 0, 'match' => "/^Syndirella/", 'group' => 'RSS Readers'),
'Straw' => array('count' => 0, 'match' => "/^Straw/", 'group' => 'RSS Readers'),
'LiveJournal.com Syndicator' => array('count' => 0, 'match' => "/^LiveJournal.com/", group => "RSS Readers"),
'Bloglines' => array('count' => 0, 'match' => "/^Bloglines/", group => "RSS Readers"),
'Aquaintances' => array('count' => 0, 'match' => "/^Aquaintances/", group => "RSS Readers"),
'Magpie RSS reader (PHP)' => array('count' => 0, 'match' => "/^MagpieRSS/", group => "RSS Readers"),
'Misc <acronym title="UltraLiberal Feed Parser">ULFP</acronym> user' =>
array('count' => 0, 'match' => "#\+http\://diveintomark\.org/projects/feed_parser/#", group => "RSS Readers"),
'SiteCheck' => array('count' => 0, 'match' => "/^sitecheck.internetseer.com/", 'group' => 'Other'),
'Other' => array('count' => 0, 'match' => "/^.*$/", 'group' => 'Other'),
);
switch ($wanted[2]){
case "search":
$out .= "<p>Search queries that led to your site</p>";
$term = false;
switch ($wanted[3]){
case "day":
$term = date("U", strtotime("1 day ago"));
break;
case "week":
$term = date("U", strtotime("1 week ago"));
break;
case "month":
$term = date("U", strtotime("1 month ago"));
break;
case "year":
$term = date("U", strtotime("1 year ago"));
break;
case "today":
$term = date("U", strtotime(date("Y-m-d")));
break;
case "thismonth":
$term = date("U", strtotime(date("Y-m-01")));
break;
case "thisyear":
$term = date("U", strtotime(date("Y-01-01")));
break;
}
$out .= "<p>Last: "
."[ <a href=\"$sitelink/search/day\">Day</a> "
."| <a href=\"$sitelink/search/week\">Week</a> "
."| <a href=\"$sitelink/search/month\">Month</a> ]<br>\n"
."[ <a href=\"$sitelink/search/today\">Today</a> "
."| <a href=\"$sitelink/search/thismonth\">This month</a> "
."| <a href=\"$sitelink/search/thisyear\">This year</a> ]\n";
$out .= "<br>\n[ <a href=\"$sitelink/livejournal/".$wanted[3]."\">Switch to Livejournals</a> | <a href=\"$sitelink/referer/".$wanted[3]."\">Switch to Referers</a> ]";
$out .= "</p>";
$items = referers("search",false,$term);
$out .= $page->buildlist($items);
$out .= print_r_to_var($referers);
break;
case "referer":
$out .= "<p>These are the pages people came to your site from</p>";
$term = false;
switch ($wanted[3]){
case "day":
$term = date("U", strtotime("1 day ago"));
break;
case "week":
$term = date("U", strtotime("1 week ago"));
break;
case "month":
$term = date("U", strtotime("1 month ago"));
break;
case "year":
$term = date("U", strtotime("1 year ago"));
break;
case "today":
$term = date("U", strtotime(date("Y-m-d")));
break;
case "thismonth":
$term = date("U", strtotime(date("Y-m-01")));
break;
case "thisyear":
$term = date("U", strtotime(date("Y-01-01")));
break;
}
$out .= "<p>Last: "
."[ <a href=\"$sitelink/referer/day\">Day</a> "
."| <a href=\"$sitelink/referer/week\">Week</a> "
."| <a href=\"$sitelink/referer/month\">Month</a> ]<br>\n"
."[ <a href=\"$sitelink/referer/today\">Today</a> "
."| <a href=\"$sitelink/referer/thismonth\">This month</a> "
."| <a href=\"$sitelink/referer/thisyear\">This year</a> ]\n";
$out .= "<br>\n[ <a href=\"$sitelink/search/".$wanted[3]."\">Switch to Searches</a> | <a href=\"$sitelink/livejournal/".$wanted[3]."\">Switch to Livejournals</a> ]";
$out .= "</p>";
$items = referers("referer", false, $term);
$out .= $page->buildlist($items);
$out .= print_r_to_var($referers);
break;
case "refforlink":
$url = parse_url ($_GET['url']);
$out .= "<p>These are the pages people came to <A HREF=\"".$url['path']."\">".$url['path']."</A> from</p>";
$term = false;
$out .= "<p>Last: "
."[ <a href=\"$sitelink/referer/day\">Day</a> "
."| <a href=\"$sitelink/referer/week\">Week</a> "
."| <a href=\"$sitelink/referer/month\">Month</a> ]<br>\n"
."[ <a href=\"$sitelink/referer/today\">Today</a> "
."| <a href=\"$sitelink/referer/thismonth\">This month</a> "
."| <a href=\"$sitelink/referer/thisyear\">This year</a> ]\n";
$out .= "</p>";
$items = refforpage(urldecode($url['path']), false, $term);
$out .= $page->buildlist($items);
#$out .= print_r_to_var($referers);
break;
case "livejournal":
$out .= "<p>These are the Livejournal friends pages people came to your site from</p>";
$term = false;
switch ($wanted[3]){
case "day":
$term = date("U", strtotime("1 day ago"));
break;
case "week":
$term = date("U", strtotime("1 week ago"));
break;
case "month":
$term = date("U", strtotime("1 month ago"));
break;
case "year":
$term = date("U", strtotime("1 year ago"));
break;
case "today":
$term = date("U", strtotime(date("Y-m-d")));
break;
case "thismonth":
$term = date("U", strtotime(date("Y-m-01")));
break;
case "thisyear":
$term = date("U", strtotime(date("Y-01-01")));
break;
}
$out .= "<p>Last: "
."[ <a href=\"$sitelink/referer/day\">Day</a> "
."| <a href=\"$sitelink/referer/week\">Week</a> "
."| <a href=\"$sitelink/referer/month\">Month</a> ]<br>\n"
."[ <a href=\"$sitelink/referer/today\">Today</a> "
."| <a href=\"$sitelink/referer/thismonth\">This month</a> "
."| <a href=\"$sitelink/referer/thisyear\">This year</a> ]\n";
$out .= "<br>\n[ <a href=\"$sitelink/search/".$wanted[3]."\">Switch to Searches</a> | <a href=\"$sitelink/referer/".$wanted[3]."\">Switch to Referers</a> ]";
$out .= "</p>";
$items = referers(