#!/bin/sh

# Stephen Cope; 2005-10-10
# Department of Statistics, The University of Auckland
# http://www.stat.auckland.ac.nz/~kimihia/maori-keyboard

TMPFILE=`mktemp htmltoutf8.XXXXXXXXX`

if [ -f "$1" ] ; then

cat "$1" | \
sed 	-e 'sX&#256;X'`echo -e "\xC4\x80"`'Xg' \
	-e 'sX&#257;X'`echo -e "\xC4\x81"`'Xg' \
	-e 'sX&#274;X'`echo -e "\xC4\x92"`'Xg' \
	-e 'sX&#275;X'`echo -e "\xC4\x93"`'Xg' \
	-e 'sX&#298;X'`echo -e "\xC4\xAA"`'Xg' \
	-e 'sX&#299;X'`echo -e "\xC4\xAB"`'Xg' \
	-e 'sX&#332;X'`echo -e "\xC5\x8C"`'Xg' \
	-e 'sX&#333;X'`echo -e "\xC5\x8D"`'Xg' \
	-e 'sX&#362;X'`echo -e "\xC5\xAA"`'Xg' \
	-e 'sX&#363;X'`echo -e "\xC5\xAB"`'Xg' \
	> $TMPFILE

mv "$1" "$1~" && mv $TMPFILE "$1"

grep -qi 'encoding="UTF-8"' "$1"

if [ ! $? ] ; then
	echo Warning: you should mention the new document encoding in an XML declaration
fi

else

echo $0 file.html
echo Convert HTML entities for macrons to UTF-8 characters
exit 1

fi


