#!/usr/bin/perl

# enables 'strict' and 'warnings'
use v5.36;

use Momjian_us;

###

# Alternate provider:   https://blog.cloudflare.com/introducing-1-1-1-1-for-families/

# Check DNS lookups against adult DNS checklist
# Cannot use tcpdump because the reverse IP often does not match
# the forward IP and the filter then does not work

# fix by software update, 2016-02-19
# Avast:  turn off 1000 site DNS scan by disabling Secure DNS (DNS Hijacking check)
#  Settings / Active Protection / Web Shield, Customize / No not scan trusted sites
#  Settings / Tools / Home Network Security
# https://support.opendns.com/entries/57943894-Avast-2015-Security-Suite-Secure-DNS-and-OpenDNS
# https://support.opendns.com/entries/59545000-Heads-up-Excessive-DNS-queries-by-AVAST-2015
# https://forum.avast.com/index.php?topic=163825.0
# https://forum.avast.com/index.php?topic=173791.0

### Packages

use File::Basename;
use Getopt::Std;
use Readonly;
use Sys::Hostname;
use Socket;
use Fcntl qw(:flock :seek);
use Net::DNS;

Readonly my $MY_HOST => quotemeta(hostname);

my $usage = sprintf(
	"Usage:  %s [-d] [-e] [-r]\n\t-d disable\n\t-e enable\n\t-r remove last line\n",
	basename($0));


# uses opendns.com lookups
# https://www.opendns.com/dashboard/settings/1796077/content_filtering
# IP addresses
# https://support.opendns.com/entries/69715954-What-are-the-OpenDNS-Block-Page-IP-Addresses-
Readonly my @NAME_SERVERS => qw(208.67.222.222 208.67.220.220);
Readonly my $BLOCK_IP => '146.112.61.106';
Readonly my $FOUND_FILENAME => '/u/safetycheck/found';
Readonly my $DISABLE_FILENAME => '/u/safetycheck/disable';


sub skip_hosts    #($)
{
	return shift =~ m{(
		\.in-addr\.arpa |	# can't lookup IP addresses
		\.opendns\.com |	# our own adult lookups
		\.spamhaus\.org |	# spam lookups
		candle\.pha\.pa\.us |	# old hostname
		\.home |		# .home domain
		\b$MY_HOST\b
	)$}ix;
}


# Make regexp of previously accepted web sites
# We use regexp because we want to match the lower part of the hostname
sub load_previously_found_hosts    #()
{
	my @hosts;

	open(my $found, '<', $FOUND_FILENAME) or
	  sysdie("cannot open $FOUND_FILENAME");
	while (<$found>)
	{
		chomp;

		next if (m/^\s*$/);
		push @hosts, quotemeta(reverse($1)) if (m/^.*?:\s+(\S+)\s*$/);
	}
	close($found) or sysdie("cannot close $FOUND_FILENAME");

	# return regex
	return join('|', @hosts);
}


sub create_email_hosts    #()
{
	my %mail_hosts;

	# Email
	# The DNS request might have been for email, so eliminate them.
	# We might have rotated the logs so grab from the previous log as well.
	# Grab only 2.5k lines, for performance.

	my $filename =
	  '(cat /var/log/exim4/mainlog.1; [ -e /var/log/exim4/mainlog ] && cat /var/log/exim4/mainlog) | tail -2500';
	open(my $maillog, '-|', $filename) or
	  sysdie("cannot open pipe $filename");
	while (my $line = <$maillog>)
	{
		chomp $line;
		$line = lc $line;

		# check only the first host name
		next if ($line !~ m/^[^@]*@([^:> ]+)/);
		my $host = $1;
		next if (skip_hosts($host));

		# remove prefix to reduce chance of more entries for this host
		$host =~ s/^www\d*\.//i;
		$host =~ s/^mx\d*\.//i;
		$host =~ s/^mail\d*\.//i;
		$mail_hosts{$host} = 1;
	}
	close($maillog) or sysdie("cannot close pipe $filename");
	return \%mail_hosts;
}


sub create_dns_hosts    #()
{
	my %dns_hosts;

	# Get DNS lookups
	my $scan_filename = '/var/log/safety/safety.log';
	open(my $scan_dns, '<', $scan_filename) or
	  sysdie("cannot open $scan_filename");
	while (<$scan_dns>)
	{
		chomp;
		my @fields = split(m/[ #]/);

		my $source = lc $fields[5];
		my $check = lc $fields[11];

		next if ($fields[9] eq 'external:');
		next if ($fields[10] =~ m/^(CH|internal)$/i);
		# self-check lookups sometimes have no dots
		next if ($source eq $check);
		next if ($check !~ m/\./);

		# error on format changes
		if ($fields[2] ne 'queries:' ||
			$fields[3] ne 'client' ||
			$fields[8] ne 'view' ||
			$fields[9] ne 'internal:' ||
			$fields[10] ne 'query:' ||
			# IPv4 or IPv6
			(   $source !~ m/^\d+\.\d+\.\d+\.\d+$/ &&
				$source !~ m/^[\da-fA-F]*:[\da-fA-F:]+$/i))
		{
			say(STDERR "Invalid format in log file:  $_");
			# exit loop so we don't print too many failures, and so we still do the cleanup
			last;
		}

		# remove prefix to reduce chance of more entries for this host
		$check =~ s/^www\d*\.//i;
		$check =~ s/^mx\d*\.//i;
		$check =~ s/^mail\d*\.//i;

		next if (skip_hosts($check));

		my @octets = split(m/\./, $source);

		# discard guest and DHCP hosts for privacy
		next
		  if ($source =~ m/^172\.20\.1\./ &&
			$octets[2] < 16);

		# request came in from outside our network
		next if ($source !~ m/^172\.20\.1\./);

		# skip server lookups
		next if ($source =~ m/^$MY_HOST$/i);

		# if multiple hosts lookup the same ip, we remember only the last one
		$dns_hosts{$check} = $source;
	}

	close($scan_dns) or sysdie("cannot close $scan_filename");

	# Truncate file
	open($scan_dns, '>', $scan_filename) or
	  sysdie("cannot create $scan_filename");
	close($scan_dns) or sysdie("cannot close $scan_filename");

	return \%dns_hosts;
}


# Add host found on adult list to permanent safe list
sub append_found_host    #($$)
{
	my ($host, $source_host) = @_;

	open(my $safe, '>>', $FOUND_FILENAME) or
	  sysdie("cannot open $FOUND_FILENAME");

	my $location = `location "$source_host"`;
	say({$safe} localtime() . " on $location ($source_host): $host");
	close($safe) or sysdie("cannot close $FOUND_FILENAME");
	return;
}


sub perform_checks    #($$$)
{
	my ($previously_found_hosts, $email_hosts, $dns_hosts) = @_;
	my $last_addition = '';

	my $dns = Net::DNS::Resolver->new(
		nameservers => [@NAME_SERVERS],
		recurse => 1,
		debug => 0);

	# Sort by having shorter URLs appear first, to prevent duplicate additions
	for my $key (sort { (reverse $a) cmp(reverse $b) } keys %$dns_hosts)
	{
		# Don't check/add it a shorter URL was just added
		next if ($last_addition && $key =~ m/\.$last_addition$/i);

		# exclude previously found hosts
		# use a regex because it might be key might be longer than the found host
		# reverse to optimize check
		next
		  if ($previously_found_hosts ne '' &&
			reverse($key) =~ m/^(?:$previously_found_hosts)\b/i);

		# exclude email hosts
		next if (defined($email_hosts->{$key}));

		my $source_host =
		  gethostbyaddr(inet_aton($dns_hosts->{$key}), AF_INET);

		if (my $dns_query = $dns->query($key, 'A'))
		{
			foreach my $dns_result ($dns_query->answer)
			{
				if ($dns_result->type eq 'A' &&
					$dns_result->address eq $BLOCK_IP)
				{
					chomp(my $location = `location "$source_host"`);
					print(
						"$key access from $location ($source_host) found on unsafe list.\n"
					);
					append_found_host($key, $source_host);
					$last_addition = quotemeta($key);
					last;
				}
			}
		}
	}
	return;
}


sub HELP_MESSAGE    #()
{
	print $usage;
	exit(0);
}


# ---- main ----

my %argv_opts;

if (!getopts('dehr', \%argv_opts) || $argv_opts{h})
{
	print $usage;
	exit(1);
}
die("Invalid argument \"@ARGV\"\n$usage") if (@ARGV);

# disable
if ($argv_opts{d})
{
	open(my $disable, '>>', $DISABLE_FILENAME) or
	  sysdie("cannot open $DISABLE_FILENAME");
	close($disable) || sysdie("cannot close $DISABLE_FILENAME");
	exit(0);
}

# remove last line in found file
if ($argv_opts{r})
{
	# optional count for -r
	for my $linecount (1 .. (scalar @ARGV > 0 ? $ARGV[0] : 1))
	{
		open(my $found, '+<', $FOUND_FILENAME) or
		  sysdie("cannot open $FOUND_FILENAME");
		flock($found, LOCK_EX) or sysdie('Locking failed');

		my @lines = <$found>;
		seek($found, 0, SEEK_SET);

		for my $line (0 .. $#lines - 1)
		{
			print {$found} $lines[$line];
		}

		# truncate to new length
		truncate($found, tell($found));

		flock($found, LOCK_UN);

		close($found) or sysdie("cannot close $FOUND_FILENAME");

		# output removed line
		print STDOUT $lines[-1];
	}
	exit(0);
}

# allow only one instance of this script to run
open(my $app_lock, '<', "$0") or sysdie('Open failed');
flock($app_lock, LOCK_EX) or sysdie('Locking failed');

my $previously_found_hosts = load_previously_found_hosts;
my $email_hosts = create_email_hosts;
my $dns_hosts = create_dns_hosts;

# unlink if file exists or is older than 8 hours
unlink $DISABLE_FILENAME
  if (-e $DISABLE_FILENAME &&
	(-M $DISABLE_FILENAME) >= 8 / 24);

perform_checks($previously_found_hosts, $email_hosts, $dns_hosts)
  if (!-f $DISABLE_FILENAME);

flock($app_lock, LOCK_UN);
close($app_lock) or sysdie('Locking failed');

# enable, run at end so we clear cache first
unlink $DISABLE_FILENAME if ($argv_opts{e});
