#!/usr/bin/perl
#  drbcheck.cgi - dr. jørgen mash's DNS Datadase List Checker
#  Copyright (C) 2002-2003 by dr. jørgen mash <http://moensted.dk/>
#  All rights reserved.
#
#  Build on:
#  Joe Jarrod's rbcheck-0.10-13 - <http://relays.osirusoft.com/>
#      <http://www.osirusoft.com/rbcheck/>
#
#  This is free software with ABSOLUTELY NO WARRANTY.
#  You can redistribute it and/or modify, if you do please drop
#  me a line at drbcheck @ moensted.dk
#
$ver = "1.4.3";
#  24/04/03 1.4.3  new links to SenderBase
#  04/03/03 1.4.2  new order in link to other resources, and new whois
#  08/02/03 1.4.1  Compleate redesign of lookup we now send our all queries at the beginning, and read then late on...
#  01/02/03 1.3.16 Add link to Trustic
#  23/01/03 1.3.15 Include link to form for new rbl's, do not make human readable timestamps if first digit is 0
#  15/12/02 1.3.14 Spam Links changed the URL
#  12/11/02 1.3.13 add link to -10 prev and next +10 ip address lookup.
#  12/11/02 1.3.12 browers that understand <abbr> now gets the long name when howering timeout sites
#  12/11/02 1.3.11 /000\.000\.000\.000/ is now /0+\.0+\.0+\.0+/
#  10/11/02 1.3.10 ^http$ don't need links
#  08/11/02 1.3.9  moved external links around
#  07/11/02 1.3.8  ip numbers written as 123-123-123-123 are now striped to 123.123.123.123
#  21/10/02 1.3.7  don't cache if there is nothing to lookup
#  20/10/02 1.3.6  remove ()[] from addr input
#  20/10/02 1.3.5  changes to make the cache work more usefull
#  20/10/02 1.3.4  use CGI::Cache (http://cgicache.sourceforge.net/) to speedup lookup times
#  20/10/02 1.3.3  webify <url:http://*> from GIRL TXT records
#  19/10/02 1.3.2  Add link to Spam Links
#  17/10/02 1.3.1  Add PayPal link....
#  03/10/02 1.3.0  Removed CGI lib - and only show listings with acutal IP addresses in the lookup
#  24/09/02 1.2.4  now shows timeout >= and not >
#  23/09/02 1.2.3  change , to . before we try to resolv the input
#  15/09/02 1.2.2  drbsites now has a type field - stay tuned to figure how it will be used...
#  06/09/02 1.2.1  minor changes to the email detect in webify to handel DRBL server@ns/ is not a e-mail
#  02/09/02 1.2.0  new webify sub, creates clickable content from txt records (ides from Joe's 0.12-46) njabl time stamp now readable
#  ...
#  09/06/02 1.0.0  First major update.
#   - moved the list to it's own file to make updates more easy.
#   - new field in drbsites: optional name server.
#   - removed links to local script in drbsites to save space.
#   - changed all lookups to use Net::DNS (faster?)
#   - removed JIPPG <http://www.jippg.org/> code
#   - only shows common RBL's in nomatch part of result page
#   - don't look up, if no ip/host
#

$beg = time;
&readinput;
$debug = 1 if $param{'debug'};
$nocache = 1 if $param{'nocache'};

if ($param{'addr'}) {
    $test = $param{'addr'};
    $test =~ s/\s+//g;
    $test =~ s/\(|\)|\[|\]//g;
    $test =~ s/\,/\./g; # I'm stupid and type , instead of .
    $test =~ s/\-/\./g if $test =~ /^\d+\-\d+\-\d+\-\d+$/; # I'm stupid and type , instead of .
    $test =~ s/\.$//;
    $test = "" if $test =~ /0+\.0+\.0+\.0+/;
}

if (($test) && (!$nocache)){
    use CGI::Cache;
    CGI::Cache::setup( { cache_options =>
                       { max_size => 20 * 1024 * 1024,
                         default_expires_in => 3600,
                       }
                     } );
    CGI::Cache::set_key( $test );
    warn ($ENV{REMOTE_ADDR}," cache key set: >$test<") if $debug;
    CGI::Cache::start() or exit;
}

$script = $ENV{SCRIPT_NAME};
$script =~ s/(.*)\/index.*/\1\//; # we don't want /index.cgi

$drbsitesrev = 0;

require 'drbsites.txt'; # http://www.moensted.dk/spam/drbsites.txt

print "Content-type: text/html; charset=iso-8859-1\n\n";
print qq!<html><head><title>drbcheck: dr. J&oslash;rgen Mash's DNS database list checker</title>
<link rel="stylesheet" type="text/css" href="style.css"><link REL="SHORTCUT ICON" href="favicon.ico">
</head><body>\n!;
require 'htmltop.txt' if -e "htmltop.txt"; 

print qq!\n<form method="get" action="$script" enctype="application/x-www-form-urlencoded">Enter an ip address or domain name: <input type="text" name="addr" value="$test"><input type="submit" name="Submit" value="Submit"></form>\n!;

if ($test) {
    my %sock = ();
	use Net::DNS;
	$::RESOLVER = Net::DNS::Resolver->new;
	$tcptimeout = 5;                       # the default timeout is 120 seconds (2 minutes) 
	$delay = $tcptimeout;
	$::WAIT_INCREMENT = 0.1;

	&sendquery("test", "$test");
	
    print "<hr>disclaimer: <B>I neither endorses nor opposes the use of any of the below mentioned DNSbl as filters. <br>";
    print "Please visit the policy pages of each respective link prior to making nominations or removals. ";
    print "Each list has their own criteria and procedure, and to defy their procedures simply creates friction, irritation, and urges to kill.</B><br>";
    print "note: Not all lists are intentet as black/block lists!<br>";
    print "You are ALWAYS listet on one or three lists. This do not indicate that you are a SPAMmer!<br><hr>\n";
    
	@resolved = &readquery("test");
 
    if (@resolved) {
        print "Resolved <a href=\"$script?addr=$test\">$test</a>";
        for my $resolved (@resolved) {
            print " to <a href=\"$script?addr=$resolved\">$resolved</a>";
        }
        print "<br>\n";
        $resolved = pop @resolved;
        if ($test =~ /\d+\.\d+\.\d+\.\d+/) {
            ($a,$b,$c,$d) = split (/\./,$test);
            &rblbgsend;
            $host = $resolved;
            &mx_records($resolved);
        } else {
            ($a,$b,$c,$d) = split (/\./,$resolved);
            &rblbgsend;
            $host = $test;
            &mx_records($test);
        }
    } elsif ($test =~ /\d+\.\d+\.\d+\.\d+/) {
        ($a,$b,$c,$d) = split (/\./,$test);
        &rblbgsend;
        print "[<a href=\"$script?addr=$test\">$test</a>]";
    } else {
        print "no IP found for $test - try again <br>";
        &mx_records($test);
        &footer;
        exit;
    }

    print "<hr>\n";
    $count=0; $numberoftest=0;
    warn ($ENV{REMOTE_ADDR}," looking up: $a.$b.$c.$d ($param{'addr'})") if $debug;
            
    $lasttime = $beg;
    foreach $rblsite (@drbsites) {
        $duration = (time - $lasttime);
        $lasttime = time;
        print "&lt\;-- took $duration !! $rblcode<br>" if ($debug);
        ($rblcode,$rbls,$rblw,$rbln,$rblp,$rbabout,$rbstatus,$rblremoval,$longname,$ok,$txt,$type,$rbldns) = split ('\x3b',$rblsite);
        warn ("$rblcode  ". (time - $beg) ) if $debug;
        if ($ok) {
            $test1 = join (".",$d,$c,$b,$a,$rbls);
            $ipfound = 0;
            $numberoftest++;

		    my @r1 = &readquery($rblcode);

            if (@r1[-1] =~ /\d+\.\d+\.\d+\.\d+/) {
				&sendquery($rblcode.about,$rbabout,"TXT",$rbldns) if ($rbabout);
                $count++;
                print "<b>+ ".($rblp ? "<a href=\"$rblp\">$rblcode</A>": $rblcode)." </b><i>$longname</i>: ";
                print ($rblw ? "<a href=\"http://$rblw$a.$b.$c.$d\">$rbls</A> " : "$rbls ");
                for my $r1 (@r1) {
                    print " -&gt; $r1";
                }
                print "<b>";

                if ($txt) {
			    	my @txtinfo2 = &readquery($rblcode.txt);
                    foreach my $n1 (@txtinfo2) { 
                        my @narr =  split ('\x22',$n1);
                        foreach my $narrl (@narr) {
                            print "<br>" . &webify($narrl) if ($narrl);
                        }
                    }
                }
                if ($rbabout) {
				    my @txtinfo = &readquery($rblcode.about);
                    foreach my $n1 (@txtinfo) { 
                        print "</b><br>about: <b>";
                        my @narr =  split ('\x22',$n1);
                        foreach my $narrl (@narr) {
                            print (&webify($narrl));
                        }
                    }
                }
                print "</b>";
                print "<br>[<a href=\"$rblremoval\">removal</a>]" if ($rblremoval);
                print "<br><hr>\n";            
            } else { # Not listet
                $timeoutsites .= "<abbr title=\"$longname\">" . ($rblw ? "<a href=\"http://$rblw$a.$b.$c.$d\">$rblcode</A> " : "$rblcode ") . "</abbr>" if ((time - $lasttime) >= $tcptimeout);
			    if ($ok == 2) {
                    $nomatch .= "- ".($rblp ? "<a href=\"$rblp\">$rblcode</A>": $rblcode)." <i>$longname</i>: ";
                    $nomatch .= ($rblw ? "<a href=\"http://$rblw$a.$b.$c.$d\">$rbls</A> " : "$rbls ");
                    $nomatch .= "[<a href=\"$rbln$a.$b.$c.$d\">Nominate</A>] " if ($rbln);
                    $nomatch .= " [<a href=\"http://www.foobar.tm/beta/index.cgi?domain=$rbstatus\">Check DNS</a>] " if ($rbstatus);
                    $nomatch .= "<br>\n";
                }
            }

        } else { # We can't test
            if ($txt) {
                $notesting .= ($rblp ? "? <a href=\"$rblp\">$rblcode</A>" : "? $rblcode");
                $notesting .= " <i>$longname</i>: ";
                $notesting .= ($rblw ? "<a href=\"http://$rblw$a.$b.$c.$d\">$rbls</A> (click for manual search)" : $rbls);
                $notesting .= "<br>\n";
            }
        }
    }

    print "\n[<a href=\"$script?addr=" . &nextip($test,-10) . "\">&lt;&lt;</a>|<a href=\"$script?addr=" . &nextip($test,-1) . "\">&lt;</a>]";
    print " $a.$b.$c.$d ";
    print "[<a href=\"$script?addr=" . &nextip($test,1) . "\">&gt;</a>|<a href=\"$script?addr=" . &nextip($test,10) . "\">&gt;&gt;</a>] ";
    print "was found in $count lists (of $numberoftest <A href=\"$script\">tested</A>)<br>\n";
    print "note: You are ALWAYS listet on three or more lists. This do not indicate that you are a SPAMmer, or that anyone is actualy using the list to block mail from you!<br>\n";

    print "<hr><b>Selected lists that were not tested, so you may be listed there:</B><br>$notesting" if  $notesting;

    print "<hr>[news:*abuse*: <a href=\"http://groups.google.com/groups?as_epq=%22$a.$b.$c.$d%22&as_ugroup=*abuse*&scoring=d\">$a.$b.$c.$d</A>";
    print " | <a href=\"http://groups.google.com/groups?as_epq=%22$host%22&as_ugroup=*abuse*&scoring=d\">$host</A>" if $host;
    print "] [SpamCop: <a href=\"http://spamcop.net/w3m?action=checkblock&ip=$a.$b.$c.$d\">Checkblock</a> | ";
    print "<a href=\"http://spamcop.net/w3m?action=whyorbs&ip=$a.$b.$c.$d\">why ORBS</a>] "; 
    
    print "[SenderBase: <a href=\"http://senderbase.com/search?searchBy=ipaddress&searchString=$a.$b.$c.$d\">$a.$b.$c.$d/24</a>";
    print " | <a href=\"http://senderbase.com/search?searchBy=hostname&searchString=$host\">$host</A>" if $host;
    
    print "] [<a href=\"http://www.trustic.com/ip?ip=$a.$b.$c.$d\">Trustic</a>] <br>\n";
    
    print "[whois <a href=\"http://moensted.dk/spam/whois/?domain=$a.$b.$c.$d\">$a.$b.$c.$d</A>";
    print " | <a href=\"http://moensted.dk/spam/whois/?domain=$host\">$host</A>" if $host;
    print "] [SS Macro: <a href=\"http://samspade.org/t/lookat?a=$a.$b.$c.$d\">$a.$b.$c.$d</A>";
    print " | <a href=\"http://samspade.org/t/lookat?a=$host\">$host</A>" if $host;
    print "] [Whois/NS-Delegation: <a href=\"http://openrbl.org/trace.php?i=$a.$b.$c.$d&m=.&r=.\">$a.$b.$c.$d</A>";
    print " | <a href=\"http://openrbl.org/trace.php?i=$host&m=.&r=.\">$host</A>" if $host;
    print "]<br>\n";
    
    print "[DNSbl's <a href=\"http://openrbl.org/?i=$a.$b.$c.$d\">openrbl</a> | ";
    print "<a href=\"http://samspade.org/t/rbl?a=$a.$b.$c.$d\">SamSpade</a> | ";
    print "<a href=\"http://relays.osirusoft.com/cgi-bin/rbcheck.cgi?addr=$a.$b.$c.$d\">Osirusoft</a> | ";
    print "<a href=\"http://rbls.org/?q=$a.$b.$c.$d\">Multi-RBL</a> | ";
    print "<a href=\"http://tools.fpsn.net/ipbhl/?addr=$a.$b.$c.$d\">fpsn.net</a> | ";
    print "<a href=\"http://www.dnsstuff.com/tools/ip4r.ch?ip=$a.$b.$c.$d\">DnsStuff</a> | ";
    print "<a href=\"http://www.rts.com.au/spam/antispam.php3?search=$a.$b.$c.$d\">Reynolds spam db</a>] ";

    print "<hr><b>Selected lists where $a.$b.$c.$d was not found:</b><br>$nomatch" if $nomatch;
    print "<br><I>Click <A href=\"$script\">here</A> to view the full list of DNSbl's</I>\n";
    
} else {
    print "<hr>Please note that this service is SLOW (average lookup time is 40 seconds)! It check most of the " . scalar(@drbsites) . " lists shown below. ";
    print " If you want a  fast answer, pleas use <a href=\"http://openrbl.org/\">openrbl</a>.<br>";
    print " If you want more information on the list, follow the first link in the line, or check <a href=\"http://www.declude.com/junkmail/support/ip4r.htm\">Declude</a>.<br>";
    print " If you have another source of testing you would like added, send an email <a href=\"mailto:dr\@moensted.dk\">dr. j&oslash;rgen mash</a> or use this <a href=\"http://moensted.dk/spam/newdnsbl.html\">form</a>.";
    print "<hr>\n";
    print "If you like this service please make a PayPal <a href=\"https://www.paypal.com/xclick/business=paypal%40moensted.dk&item_name=moensted.dk/spam&cn=Comment+%28optional%29\">donation</a> to keep it running.";
    print "<hr>\n";
    &listall;
}

&footer;
CGI::Cache::stop() unless $nocache;
exit;

sub listall {
    foreach $rblsite (@drbsites) {
        ($rblcode,$rbls,$rblw,$rbln,$rblp,$rbabout,$rbstatus,$rblremoval,$longname,$ok,$txt,$type,$rbldns) = split ('\x3b',$rblsite);
        print ($ok ? "+ ":"?  ");
        print ($rblp ? "<a href=\"$rblp\">$rblcode</A>" : $rblcode);
        print " <i>$longname</i>: ";
        print ($rblw ? "<a href=\"http://$rblw$ENV{REMOTE_ADDR}\">$rbls</A>" : $rbls);
        print "<br>\n";
    }
}

sub footer {
    print "\n<hr>[<a href=\"http://www.declude.com/junkmail/support/ip4r.htm\">Declude DNSB List</a> | ";
    print "<a href=\"http://www.geocities.com/spamresources/filter-dnsbl.htm\">Spam Links</a>] ";
    print "[<a href=\"http://www.sdsc.edu/~jeff/spam/cbc.html\">Compare</a>] ";
    &counter;
    print "\n<hr><a href=\"http://moensted.dk/spam/drbcheck.txt\">drbcheck</a> ver. $ver, <a href=\"http://moensted.dk/spam/drbsites.txt\">drbsites</a> ";
    print "rev. $drbsitesrev by <a href=\"http://moensted.dk\">dr. j&oslash;rgen mash</a>.<br>";
#    print "\nIf you have another source of testing you would like added, send an email <a href=\"mailto:dr\@moensted.dk\">dr. j&oslash;rgen mash</a> or use this <a href=\"http://moensted.dk/spam/newdnsbl.html\">form</a>. <hr>";
#    print "\nIf you like this service please make a PayPal <a href=\"https://www.paypal.com/xclick/business=paypal%40moensted.dk&item_name=moensted.dk/spam&cn=Comment+%28optional%29\">donation</a> to keep it running.<hr>";

    CGI::Cache::pause() unless $nocache;
    $ende = time - $beg;
    print "Lookup time: $ende sec. <!-- ";
    CGI::Cache::continue() unless $nocache;
    print "This lookup was cached within the last hour! - <a href=\"$script?addr=$test&nocache=on\">bypass cache</a>";
    CGI::Cache::pause() unless $nocache;
    print "-->";
    CGI::Cache::continue() unless $nocache;
    print "\n<hr>Timeouts, not looked up: $timeoutsites" if $timeoutsites;
    print "\n</body></html>\n\n";
}

sub mx_records {
    my $mxhost = shift;
    $mxhost =~ s/^\s+|\s+$//g;

	&sendquery("MX$mxhost",$mxhost,"MX");
    my @mx_list = &readquery("MX$mxhost");

    if (@mx_list) {
    	shift @mx_list while ($mx_list[0] !~ /.* \d*/);
		if ($mx_list[0] =~ /.* \d*/) {
			if ($#mx_list == 0) {
				print "[<a href=\"$script?addr=$mxhost\">$mxhost</a> has " . ($#mx_list + 1) ." MX record";
			} elsif ($#mx_list > 0) {    
				print "[<a href=\"$script?addr=$mxhost\">$mxhost</a> has " . ($#mx_list + 1) . " MX records";
			} else {
				print "[error resolving MX for <a href=\"$script?addr=$mxhost\">$mxhost</a>]";
				return 1;
			}
			for $mx (@mx_list) {
				($smtp_pref,$smtp_result) = split (" ",$mx);
				print " <a href=\"$script?addr=$smtp_result\">$smtp_result</a>($smtp_pref)";
			}
			print "]\n";
        }
    } else {
        print "<a href=\"$script?addr=$mxhost\">$mxhost</a> has no MX records";
        my @splitter = split (/\./,$mxhost);
        shift @splitter;
        if (scalar(@splitter) > 1) {
            my $newhost = join ('.',@splitter);
            print " -&gt; ";
            &mx_records($newhost);
        }
    }  
}

sub rblbgsend {
    foreach my $rblsite (@drbsites) {
        my ($rblcode,$rbls,$rblw,$rbln,$rblp,$rbabout,$rbstatus,$rblremoval,$longname,$ok,$txt,$type,$rbldns) = split ('\x3b',$rblsite);
        warn ("$rblcode  ". (time - $beg) ) if $debug;
        if ($ok) {
            my $test1 = join (".",$d,$c,$b,$a,$rbls);
			&sendquery($rblcode,$test1,"A",$rbldns);
			&sendquery($rblcode.txt,$test1,"TXT",$rbldns) if ($txt);
		}
	}
}

sub sendquery {
	my $id = shift;
    my $lookup = shift;
    my $type = shift;
    my @dns = @_;
	my @ns = $::RESOLVER->nameservers; # store default nameservers
	warn ("bgsend $id $lookup  $type $dns") if $debug;
    $::RESOLVER->nameservers(@dns,"127.0.0.1") if @dns; # set other if apropiate
	$sock{$id} = $::RESOLVER->bgsend($lookup,$type);

	$::RESOLVER->nameservers(@ns) # restore default nameservers
}

sub readquery {
	my $id = shift;
	my $timeout = shift; 
        my @returns = ();
	#my $delay = ($timeout?$timelout: $tcptimeout);
	#warn ("read $id");

	while (($delay > 0) and (not $::RESOLVER->bgisready($sock{$id}))) {
		select(undef, undef, undef, $::WAIT_INCREMENT);
		$delay -= $::WAIT_INCREMENT;
	}
	my $result = $::RESOLVER->errorstring;
	if ($::RESOLVER->bgisready($sock{$id})) {
#		my $result = $::RESOLVER->errorstring;
		my $look = $::RESOLVER->bgread($sock{$id});
		warn " $test Query:$socket -> $result/OK\n" if $debug;
        	if ($look) {
                    for $answer ($look->answer) {
                        my @ansr =  split ('\x22',$answer->rdatastr);
                        foreach my $ans (@ansr) {
                            push @returns, $ans if ($ans);
                        }
                    }
		}
	} else {
		warn " $test:$id  noQuery:$socket -> $result\n" if $debug;
	}
	$sock{$id} = undef;
    return @returns;
}

sub counter {
    my $counterVar;
    print "<hr>";
    $file = $ENV{SCRIPT_FILENAME};
    $file =~ s/(.*)\/.*/\1/;
    $file .= "/drbcheck.cnt";  # must be readable/writable by your webserver's user
    open (FILE, "+>>" , $file) or warn "cannot open $file for reading and appending: $!";
    flock(FILE, 2) or warn "cannot lock $file exclusively: $!";
    seek FILE, 0, 0;
    my @file_contents = <FILE>;  # we use an array even though there won't be more than a single line of data here.
    if ($file_contents[0] =~ /^(\d+)$/) {
        $counterVar = $1;  # $1 is captured by the ()'s in the regular expression
	    $counterVar++;  # auto-increment the same variable with 1
	    truncate FILE, 0;
	    print (FILE $counterVar);  
    } else {
        $counterVar = "COUNTER ERROR";  # the regular expression didn't match
    }
    
    close (FILE);
    print "This page has been accessed $counterVar times since June 8. 2002<br>";
}

sub webify {
    my $line = shift;
    my $return = "";
    my @words = split(" ",$line);
    
    foreach my $word (@words) {
        if ($word =~ /^<a$|\/a>$|\">|\'>|href/i) {
            # do nothing
        } elsif ($word =~ /^<(.*)>$/) {
            $word = "&lt\;".&webify("$1")."\&gt\;";
        } elsif ($word =~ /^https?\:\/\/.+/i) { 
            $word = "<a href=\"$word\">$word</a>";
        } elsif ($word =~ /^URL:(.*)$/i) {
            $word = "URL:".&webify("$1");
        } elsif ((($word =~ /(^[^\:]+:)([^\@]*\@[\w\.-]*)(.*)$/) || ($word =~ /^()([^\@]*\@[\w\.-]*)(.*)$/)) && ($word !~ /\@.*\/.*/)) {
            $word = "$1<a href=\"mailto:$2\">$2</a>$3";
        } elsif (($word =~ /^\d{10,}$/) && ($word !~ /^0/))  { # primary NJABL.org time stamps in human form
            my $time = localtime ($word);
            $word = "$word ($time)";
        } else {
             $word =~ s/\</\&lt\;/g;
             $word =~ s/\>/\&gt\;/g;
        }
        $return .= $word . " ";
    }
    
    chop($return); # we have one space in the end - remove that
    return $return;
}

sub readinput {
    my ($input,@fields);
    if($ENV{'REQUEST_METHOD'} eq 'POST') {
        read(stdin,$input,$ENV{'CONTENT_LENGTH'});
    } elsif ($ENV{'REQUEST_METHOD'} eq 'GET') {
        $input = $ENV{'QUERY_STRING'};
    } else {
        return;
    }
    $input =~ tr/+/ /;
    @fields=split(/\&/,$input);

    foreach my $i (@fields) {
        my ($field,$data) = split(/=/,$i);
        $field =~ s/%(..)/pack("c",hex($1))/ge;
        $data =~ s/%(..)/pack("c",hex($1))/ge;
        $param{$field} = $data;
    }
}

sub nextip {
    my ($a,$b,$c,$d) = split /\./,shift;
    my $change = shift;
    my $startaddr = (($a <<24) +($b <<16) +($c<<8) +$d); 
    my $newip = $startaddr + $change;

    $a = ($newip >>24);
		  $b = (($newip >>16) &255);
		  $c = (($newip >>8) &255);
		  $d = ($newip &255);
    return ("$a.$b.$c.$d");
}
