# $Id: ~|^` @(#)htmlize.sed 1.6 2005/05/18 13:24:37 \ $
# Description: sed script for generating HTML from Internet-Draft or RFC
# Copyright (C) 2005 Bruce Lilly
# License (zlib/libpng license)
# This software is provided 'as-is', without any express or implied warranty. In
# no event will the authors be held liable for any damages arising from the
# use of this software.
#
# Permission is granted to anyone to use this software for any purpose, including
# commercial applications, and to alter it and redistribute it freely, subject
# to the following restrictions:
#
# 1. The origin of this software must not be misrepresented; you must not
# claim that you wrote the original software. If you use this software
# in a product, an acknowledgment in the product documentation would be
# appreciated but is not required.
#
# 2. Altered source versions must be plainly marked as such, and must not
# be misrepresented as being the original software.
#
# 3. This notice may not be removed or altered from any source distribution.
#
# basic HTML characters
s/&/\&/g
s/\</g
s/>/\>/g
# additional HTML characters
s/opyright (C) /opyright \© /g
s/^\([ ]*\)o /\1\• /
# heuristic (gross hack) to handle text references to sections (2nd rule repeated because some sed implementations don't handle overlap with g)
/^1[.] /,/^[IN][a-z]*ormative Reference/s@\(ection[s]* \| [Aa]ppendi[xc][es]* \|and \)\(\([A-Z]\|[1-9][0-9]*\)\([.][1-9][0-9]*\)*\)\([ .,;]\|$\)@\1\2\5@g
/^1[.] /,/^[IN][a-z]*ormative Reference/s@\([, ] \)\(\([A-Z]\|[1-9][0-9]*\)\([.][1-9][0-9]*\)\{1,\}\)\([ .,;]\|$\)@\1\2\5@g
/^1[.] /,/^[IN][a-z]*ormative Reference/s@\([, ] \)\(\([A-Z]\|[1-9][0-9]*\)\([.][1-9][0-9]*\)\{1,\}\)\([ .,;]\|$\)@\1\2\5@g
# references
1,/^[A-Za-z][A-Za-z]*ormative Reference/s,\[\([IN][1-9][0-9]*\)\([^]]*\]*\),[\1\2,g
/^[IN][a-z]*ormative Reference/,/^Full Copyright Statement/s,\[\([IN][1-9][0-9]*\)\([^]]*\]*\),[\1\2,
# URI heuristics (more gross hacks)
/^Status of /,$s@\(\(\([^:/?#" ]\{1,\}\):\)\(//[^/?#" ]*\)\([^?#" ]*\)\([?][^#" ]*\)\{0,1\}\(#[^" ]*\)\{0,1\}\)@\1@g
/^Status of /,$s;\(\([A-Za-z0-9!'*^_`{}|~]\{1,\}\([-.A-Za-z0-9!'*^_`{}|~]\{1,\}\)*\)@\(\([A-Za-z0-9][-A-Za-z0-9]\{0,62\}\([.][A-Za-z0-9][-A-Za-z0-9]\{0,62\}\)*\)\|[[][1-2]\{0,1\}[0-9]\{1,2\}[.][1-2]\{0,1\}[0-9]\{1,2\}[.][1-2]\{0,1\}[0-9]\{1,2\}[.][1-2]\{0,1\}[0-9]\{1,2\}[]]\)\);\1;g
/^Status of /,$s@\(\)\([^<]*\)\([.,:;][.,:;]*\)\(\)@\1\3\4\6\5@g
# heuristics (still more gross hacks) for RFC and draft reference URIs
/^[IN][a-z]*ormative Reference/,/^Full Copyright Statement/s@ RFC \([0-9]\{1,\}\),@ RFC \1,@g
/^[IN][a-z]*ormative Reference/,/^Full Copyright Statement/s@\([( ]\{1,\}\)\(draft-[-a-z0-9.]\{1,\}\)\([), ]\{1,\}\)@\1\2\3@g
# TOC and section headings
/^Table of Contents/,/^1[.] /s,^\([ ][ ]*\)\([A-Z0-9]\(\([.][^. ]\)*[^.]*\)*\)\(\(\([.][^.]\)*[^.]*\)*\),\1\2\5,
/^1[.] /,/^Full Copyright Statement/s;^\(\(\(\(Inf\|N\)ormative Reference[s]\)\|[AE][ud][ti][ht]or['s]['s] .*Address[es]*\) *\)$;\1;
/^1[.] /,/^Full Copyright Statement/s;^\(\([1-9][0-9]*\([.][1-9][0-9]*\)*\|\([A-Z]\|[1-9][0-9]*\)\([.][1-9][0-9]*\)\{1,\}\|Appendix [A-Z]\)[.] .*\)$;\1;
# page separators
s,,
,