#!/bin/sh
#################################################################
##
##	@(#) rfclookup	(c) Sep 2015 by H. Zuleger hznet.de
##
##	Lookup a rfc in a local refer database
##
##	The script checks if the refer database is up-to date
##	and downloads a new version if necessary.
##
##	Oct 2015	Option -l added
##	Nov 2015	Default value (10) added to option -l
##	Jan 2016	Print error message if url string is empty
##	Feb 2017	Use function to detect download command
##			Use $textloader instead of wget 
##	Aug 2017	option -4|-6 added
##	Apr 2018	'\t' in sed command changed to two spaces
##			for portability reasons
##	Jan 2020	Option -s: ls option --full-time replaced
#			with -l and output send to tr command for
##			portability reasons
##	Sep 2020	Option -a added to print just the abstract
##			of the RFC. With option -h additionally the
##			headline is printed.
##			Set option -L (--location) to curl to follow
##			HTML redirects.
##			Replace "http://" requests with "https://"
##	Mar 2021	Fix the BROWSER setting mechanism
##	Apr 2021	Use quiet/silent mode for wget/curl even for
##			db downloads
##	Oct 2023	Test if search string is at least 6 chars long
##
##	Please change the dbpath and other parameters below, to
##	fulfill your needs
##
#################################################################
PATH=/bin:/usr/bin:/usr/local/bin


### local config section starts here

dbpath="$HOME/lib/groff/refer"
db="rfc.refdb"
dbtimestampfile="last_check_$db"

test -z "$BROWSER" && BROWSER=`which w3m`	# preferred html browser
test -z "$BROWSER" && BROWSER=`which lynx`	# second best html browser
#	: ${PAGER:="less"}	# preferred pager (currently not used)

### local config end

# default address family
addrfamily=""

# path of public available refer rfc database
htmlpath="https://www.hznet.de/textproc/rfc.refdb"

# try to find a local installed program to get a file from a webserver
downloader_detection()
{
	# set name of command for HTTP downloads (use one of wget or curl)
	if type wget >/dev/null 2>&1
	then
		echo 'downloader="wget $addrfamily --quiet --timestamping --no-directories"'
		echo 'textloader="wget $addrfamily --quiet -O-"'
	elif type curl >/dev/null 2>&1
	then
		echo 'downloader="curl -s $addrfamily --location --silent --remote-name --remote-time --time-cond $db"'
		echo 'textloader="curl -s $addrfamily --location --silent"'
	else
		echo "$0: wget or curl needed; Please install one of them."1>&2
		exit 2
	fi
}

# change directory to local data store
cd $dbpath

prog=`basename $0`
checkinterval=604800	# 1 week (expr 3600 * 24 * 7)

usage()
{
	test -n "$1" && echo "error: $1" 1>&2
	echo "usage: $prog -s \t\tsyncronize rfc database" 1>&2
	echo "usage: $prog -n \t\tprint newest(last) rfc number" 1>&2
	echo "usage: $prog -l n \tprint the last 'n' rfc titles and numbers (default: 10)" 1>&2
	echo "usage: $prog [-b|-t [-u]] <searchstring>" 1>&2
	echo "\t\t\t search within the database and (optionally) start a browser" 1>&2
	echo "usage: $prog [-a [-h]] <searchstring>" 1>&2
	echo "\t\t\t search within the database and print the abstract" 1>&2
	exit 1
}

urlflag=0
browse=""
printnewestrfc=0
printlast=0
while test $# -gt 0
do
	case "$1" in
	-4|-6)	# define address family for rfc download
		addrfamily=$1
		;;
	-n)	# print newest (last) rfc in database
		printnewestrfc=1
		;;
	-l)	# print last 'n' rfc titles and numbers
		if test $# -gt 1
		then
			shift
			printlast="$1"
		else
			printlast=10
		fi
		printlast=`expr $printlast "*" 2`
		;;
	-s)	# sync only
		checkinterval=0
		;;
	-t)	# start pager
		browse="text"
		;;
	-b)	# start browser
		browse="html"
		;;
	-a)	# print rfc abstract
		abstract=1
		browse="text"
	;;
	-h)	# print additionally to the abstract the header of the rfc
		header=1
	;;
	-u)	# print just the url of the rfc
		urlflag=1
		;;
	-*)	usage;
		;;
	"")	usage
		;;
	*)	break;
		;;
	esac
	shift
done

# get time of last db update test
if test -f "$dbtimestampfile"
then
	lastchecktime=`cat $dbtimestampfile`
else
	lastchecktime=0
fi

# take a look at your watch (seconds since the epoch)
currenttime=`date +"%s"`
timediff=`expr $currenttime - $lastchecktime`

# check if db is changed in between
if test $timediff -gt $checkinterval
then
	eval `downloader_detection`
	# try to update db and create index
	$downloader $htmlpath
	echo $currenttime > $dbtimestampfile

	if test $db -nt ${db}.i
	then
		indxbib $db
	fi

	test $checkinterval -eq 0 && exit 0	# sync mode ?
fi

if test "$printnewestrfc" -eq 1 
then
	grep -e "^%R" -e "^%D"  $db | tail -2 | sed -e "s/^%. //" | tr "\012" " "
	echo
	ls -l $db | tr -s " " " " | cut -d" " -f6-
elif test "$printlast" -gt 0
then
	grep -e "^%T" -e "^%R" $db |
		tail -$printlast |
		sed -n  -e '/^%T /{ h; n; G; s/^%R //; s/\n%T /  /; p; }'
elif test $# -gt 0
then
	searchlen=`echo "$@" | wc -c`
	test $searchlen -lt 7 && usage "search string too short (must be at least 6 chars)"
	if test "$urlflag" = 1 -o -n "$browse"
	then
		url=`lkbib -p"$db" "$@" | sed -n "/^%O /s/^%O //p" | tr "\012" " "`
		if test -z "$url"
		then
			usage "couldn't find search string in rfc database"
		fi
		url=`echo "$url" | sed 's|http://|https://|'`

		if test "$browse" = "text" 
		then
			url=`echo $url | sed "s|/html/|/rfc/|"`
			url="$url.txt"
		fi

		if test "$urlflag" = "1" 
		then
			echo $url
		elif test "$browse" = "html"
		then
			$BROWSER $url
		elif test "$browse" = "text"
		then
			test -z "$textloader" && eval `downloader_detection`
			# echo "$textloader $url" 1>&2
			if test "$abstract" = 1
			then
				$textloader $url |
				if test "$header" = 1
				then
					sed -n -e '1,/^A[Bb][Ss][Tt][Rr][Aa][Cc][Tt]/p' \
						 -e '/^A[Bb][Ss][Tt][Rr][Aa][Cc][Tt]/,/^[0-9A-Za-z]/p'
				else
					sed -n -e '/^A[Bb][Ss][Tt][Rr][Aa][Cc][Tt]/,/^[0-9A-Za-z]/p'
				fi |
				uniq | sed '$d'	# squeeze duplicate lines and remove the last one
			else
				$textloader $url
			fi
		fi
		exit 0
	else
		lkbib -p"$db" "$@"
	fi
else
	usage "missing searchstring"
fi
