#!/usr/local/bin/php -q
<?php

/*

catch0rd - an IRC url catching and mirroring daemon
===================================================

Copyright (C) 2004 Vincent Negrier aka. sIX <six@aegis-corp.org>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

*/

// Anything here is - and should be - customized

//define("T0XIRC_PATH", "http://t0xirc.si.kz/current");
define("T0XIRC_PATH""inc");

// You may use the above for testing purposes, but you should install the
// t0xirc lib somewhere locally for anything else

define("BOT_HOST""192.168.109.1");
define("BOT_PORT"3333);
define("BOT_LOGIN""XXXXXXXXXX");
define("BOT_PASS""XXXXXXXXXX");

define("DOWNLOAD_DIR""/var/www/t0xico.si.kz/catch0r");
define("DESC_FILE"".desc");
define("LOCAL_URL""http://t0xico.si.kz");

define("MAX_LENGTH"2097152);

define("MAX_NESTED_REDIR"10);

$match_types = array(    "application/ogg",
                        
"application/pdf",
                        
"application/rtf",
                        
"application/x-shockwave-flash",
                        
"audio/",
                        
"image/",
                        
"video/");

// Don't touch anything below unless you know what you are doing !

define("VERSION""1.3.8a");
define("UA_TEXT""catch0rd/".VERSION." (http://catch0r.si.kz)");

error_reporting(E_ALL & ~E_NOTICE & ~E_WARNING);

require 
T0XIRC_PATH."/t0xirc.php";

function 
techo($s) {

    echo 
date("Ymd:His")." ".$s."\n";
    
flush();

}

function 
http_get_head($host$port$url$login=false$pass=false) {

    
$url "/".ltrim($url"/");
    
    if (!
$port$port 80;
    
    if (
$f = @fsockopen($host$port$errno$errstr10)) {

        
$hq  "HEAD $url HTTP/1.0\r\n";
        
$hq .= "Host: $host\r\n";
        
$hq .= "Connection: close\r\n";
        
$hq .= "User-Agent: ".UA_TEXT."\r\n";

        if (
$login || $pass) {

            
$hq .= "Authorization: Basic ".base64_encode($login.":".$pass)."\r\n";
        
        } 

        
$hq .= "\r\n";

        
fwrite($f$hq);
        
stream_set_timeout($f10);

        
$ret fread($f8192);

        
fclose($f);

        return 
$ret;

    } else {

        return 
false;
    
    }

}

function 
http_get_file($host$port$url$login=false$pass=false) {

    
$url "/".ltrim($url"/");
    
    if (!
$port$port 80;
    
    if (
$f = @fsockopen($host$port$errno$errstr10)) {

        
$hq  "GET $url HTTP/1.0\r\n";
        
$hq .= "Host: $host\r\n";
        
$hq .= "Connection: close\r\n";
        
$hq .= "User-Agent: ".UA_TEXT."\r\n";

        if (
$login || $pass) {

            
$hq .= "Authorization: Basic ".base64_encode($login.":".$pass)."\r\n";
        
        } 

        
$hq .= "\r\n";

        
fwrite($f$hq);
        
stream_set_timeout($f15);

        
$hdrs $data "";
        
$hdrs_done false;

        while (!
feof($f)) {
        
            if (!
$hdrs_done) {

                
$s fgets($f8192);

                if (
$s !== false) {

                    if (
trim($s) === "") {

                        
$hdrs_done true;

                    } else {
                    
                        
$hdrs[] = $s;

                    }

                } else {

                    
techo("H timeout while reading headers");
                    
                    return 
false;
                
                }
            
            } else {
            
                
$s fread($f8192);

                if (
$s !== false) {

                    
$data .= $s;

                } else {
                
                    
techo("H timeout while reading data");
                    
                    return 
false;

                }

            }

        }

        
fclose($f);

        
$clen 0;

        foreach (
$hdrs as $hdr) {

            
$hkey trim(strtok(strtoupper($hdr), ":"));
            
$hval trim(strtok(""));

            if (
$hkey == "CONTENT-LENGTH") {

                
$clen $hval;
                break;

            }
        
        }
        
        if (
strlen($data) != $clen) {
            
            
techo("H content length header does not match retrieved content");
            
            return 
false;

        }
        
        return 
$data;

    } else {

        return 
false;
    
    }

}

class 
catcher_bot extends t0xirc_bot {

    function 
on_pubmsg($nick$msg) {

        
$this->pubtxt_handler($nick$msg);

    }

    function 
on_pubact($nick$msg) {

        
$this->pubtxt_handler($nick$msg);

    }
    
    function 
pubtxt_handler($nick$msg) {

        if (
preg_match("%(http://[^ ]+)%"$msg$match_arr)) {

            
$match $match_arr[0];

            if (
strpos(strtolower($match), strtolower(LOCAL_URL)) === 0) {
                
                
techo("X local url '$match'");
                
                return;

            }
            
            
$match_fn basename($match);
            if (
strpos($match_fn"?")) $match_fn strtok($match_fn"?");

            
$dl_dir DOWNLOAD_DIR.DIRECTORY_SEPARATOR.date("Y-m");
            
            if (!
$url_arr parse_url($match)) {
                
                
techo("X unable to parse url '$match'");
                
                return;

            }

            
$final_url false;
            
$redir_level 0;

            while (!
$final_url) {
            
                
$url $url_arr["path"].($url_arr["query"] ? ("?".$url_arr["query"]) : "");
                
                
$lhd http_get_head($url_arr["host"], $url_arr["port"], $url$url_arr["user"], $url_arr["pass"]);

                if (!
preg_match("%HTTP/[0-9]\.[0-9] ([0-9]{3})%"strtoupper($lhd), $stat_arr)) {
                    
                    
// Pas de réponse correcte du serveur
                    
                    
$this->say("$nick: ton lien est foireux");

                    
techo("X bad link '$match'");

                    return;

                }
                
                
$status_code $stat_arr[1];

                switch (
$status_code) {

                    case 
200:
                    
                    
// HTTP 200 OK
                    
                    
$final_url true;

                    break;

                    case 
300:
                    case 
301:
                    case 
302:
                    case 
303:
                    case 
307:

                    
// HTTP 30x Moved
                    
                    
if (!preg_match("/(URI|LOCATION): (.+)/i"$lhd$redir_arr)) {
                        
                        
// Pas de redirect

                        
techo("X redirect status $status_code without location/uri for '$match'");
                        return;

                    }

                    if (++
$redir_level MAX_NESTED_REDIR) {

                        
// Trop de redirections imbriquées (boucle?)

                        
techo("X maximum nested redirect level reached (".MAX_NESTED_REDIR.")");
                        return;
                    
                    }
                    
                    
$redir_loc trim(strtok($redir_arr[2], "\n"));

                    if (
strpos(strtolower($redir_loc), "http://") === 0) {

                        
// Absolute url
                        
                        
$url_arr parse_url($redir_loc);

                    } else {

                        
// Relative path
                    
                        
$url_arr["path"] = strtok($redir_loc"?");
                        
$url_arr["query"] = strtok("");

                    }
                    
                    
$match_fn basename($url_arr["path"]);
                    
                    
techo("H redir '$match' => '$redir_loc'");

                    break;

                    default:

                    
// HTTP ???
                    
                    
techo("X unkown server response for '$match'");
                    
                    return;
                    break;
                
                }
            
            }
            
            if (!
$match_fn$match_fn "noname_".time();
            
            if (
file_exists($dl_dir.DIRECTORY_SEPARATOR.$match_fn)) {
                
                
techo("X duplicate file '$match'");
                
                return;

            }
            
            if (!
preg_match("/CONTENT-TYPE: (.+)/"strtoupper($lhd), $type_arr)) {
                
                
// Pas de type spécifié

                
techo("X no content type from server '$match'");
                
                return;

            }
            
            
$ctype strtok(strtolower(trim($type_arr[1])), "\n");

            if (!
$ctype) {
                
                
techo("X no content type");
                
                return;

            }

            
$type_ok false;
            
            foreach (
$GLOBALS["match_types"] as $mtype) if (strpos($ctype$mtype) === 0) {

                
$type_ok true;
                break;
            
            }

            if (!
$type_ok) {
                
                
file_put_contents($dl_dir DIRECTORY_SEPARATOR "urls.html"date("Ymd:His") . " &lt;{$nick}&gt; <a href='{$match}'>{$match}</a><br />\n"FILE_APPEND);
                
                
techo("X content type of '$match' is $ctype");
                
                return;

            }
            
            if (!
preg_match("/CONTENT-LENGTH: ([0-9]+)/"strtoupper($lhd), $length_arr)) {
                
                
// Pas de taille spécifiée

                
techo("X no content length from server '$match'");

                return;

            }
            
            
$length = (int)$length_arr[1];
            
            if ((!
$length) || ($length MAX_LENGTH)) {
                
                
techo("X content length of '$match' is ".number_format($length)." bytes");
                
                return;

            }
            
            
techo("C $match (by $nick / ".number_format($length)." bytes)");

            if (!
is_dir($dl_dir)) {

                if (!
mkdir($dl_dir)) {

                    
techo("! unable to create download dir $dl_dir !");
                    return;
                
                }
            
                
techo("! new download dir $dl_dir created");
            
            }

            
$data http_get_file($url_arr["host"], $url_arr["port"], $url$url_arr["user"], $url_arr["pass"]);

            if (!
$data) {

                
techo("X transfer failed for '$match'");
                return;
            
            }
            
            
$dl_fn $dl_dir.DIRECTORY_SEPARATOR.$match_fn;
            
            if (
$f = @fopen($dl_fn"w")) {

                
fwrite($f$data);
                
fclose($f);

            } else {

                
techo("? unable to open file '$dl_fn' for writing");
            
            }
            
            if (
$f = @fopen($dl_dir.DIRECTORY_SEPARATOR.DESC_FILE"a")) {

                
fwrite($f$match_fn." &lt;".$nick."&gt; ".htmlentities($msg)."\n");
                
fclose($f);

            } else {

                
techo("? unable to open desc file for appending");
            
            }
        
        }
    
    }

}

techo("catch0rd/".VERSION." (c) 2004 by sIX / aEGiS <six@t0x.net>");

$bot = new catcher_bot;

$bot->set_host(BOT_HOST);
$bot->set_port(BOT_PORT);
$bot->set_login(BOT_LOGIN);
$bot->set_pass(BOT_PASS);

if (
$bot->connect()) {

    
techo("connected to eggdrop, watching channel #".$bot->channel["name"]);

} else {

    
techo("unable to connect to eggdrop");
    exit(-
1);    

}

$bot->run();

?>