# $Id: ~|^` @(#)htmlize.sed 1.6 2005/05/18 13:24:37 \ $ # Description: sed script for generating HTML from Internet-Draft or RFC # Copyright (C) 2005 Bruce Lilly # License (zlib/libpng license) # This software is provided 'as-is', without any express or implied warranty. In # no event will the authors be held liable for any damages arising from the # use of this software. # # Permission is granted to anyone to use this software for any purpose, including # commercial applications, and to alter it and redistribute it freely, subject # to the following restrictions: # # 1. The origin of this software must not be misrepresented; you must not # claim that you wrote the original software. If you use this software # in a product, an acknowledgment in the product documentation would be # appreciated but is not required. # # 2. Altered source versions must be plainly marked as such, and must not # be misrepresented as being the original software. # # 3. This notice may not be removed or altered from any source distribution. # # basic HTML characters s/&/\&/g s//\>/g # additional HTML characters s/opyright (C) /opyright \© /g s/^\([ ]*\)o /\1\• / # heuristic (gross hack) to handle text references to sections (2nd rule repeated because some sed implementations don't handle overlap with g) /^1[.] /,/^[IN][a-z]*ormative Reference/s@\(ection[s]* \| [Aa]ppendi[xc][es]* \|and \)\(\([A-Z]\|[1-9][0-9]*\)\([.][1-9][0-9]*\)*\)\([ .,;]\|$\)@\1\2\5@g /^1[.] /,/^[IN][a-z]*ormative Reference/s@\([, ] \)\(\([A-Z]\|[1-9][0-9]*\)\([.][1-9][0-9]*\)\{1,\}\)\([ .,;]\|$\)@\1\2\5@g /^1[.] /,/^[IN][a-z]*ormative Reference/s@\([, ] \)\(\([A-Z]\|[1-9][0-9]*\)\([.][1-9][0-9]*\)\{1,\}\)\([ .,;]\|$\)@\1\2\5@g # references 1,/^[A-Za-z][A-Za-z]*ormative Reference/s,\[\([IN][1-9][0-9]*\)\([^]]*\]*\),[\1\2,g /^[IN][a-z]*ormative Reference/,/^Full Copyright Statement/s,\[\([IN][1-9][0-9]*\)\([^]]*\]*\),[\1\2, # URI heuristics (more gross hacks) /^Status of /,$s@\(\(\([^:/?#" ]\{1,\}\):\)\(//[^/?#" ]*\)\([^?#" ]*\)\([?][^#" ]*\)\{0,1\}\(#[^" ]*\)\{0,1\}\)@\1@g /^Status of /,$s;\(\([A-Za-z0-9!'*^_`{}|~]\{1,\}\([-.A-Za-z0-9!'*^_`{}|~]\{1,\}\)*\)@\(\([A-Za-z0-9][-A-Za-z0-9]\{0,62\}\([.][A-Za-z0-9][-A-Za-z0-9]\{0,62\}\)*\)\|[[][1-2]\{0,1\}[0-9]\{1,2\}[.][1-2]\{0,1\}[0-9]\{1,2\}[.][1-2]\{0,1\}[0-9]\{1,2\}[.][1-2]\{0,1\}[0-9]\{1,2\}[]]\)\);\1;g /^Status of /,$s@\(\)\([^<]*\)\([.,:;][.,:;]*\)\(\)@\1\3\4\6\5@g # heuristics (still more gross hacks) for RFC and draft reference URIs /^[IN][a-z]*ormative Reference/,/^Full Copyright Statement/s@ RFC \([0-9]\{1,\}\),@ RFC \1,@g /^[IN][a-z]*ormative Reference/,/^Full Copyright Statement/s@\([( ]\{1,\}\)\(draft-[-a-z0-9.]\{1,\}\)\([), ]\{1,\}\)@\1\2\3@g # TOC and section headings /^Table of Contents/,/^1[.] /s,^\([ ][ ]*\)\([A-Z0-9]\(\([.][^. ]\)*[^.]*\)*\)\(\(\([.][^.]\)*[^.]*\)*\),\1\2\5, /^1[.] /,/^Full Copyright Statement/s;^\(\(\(\(Inf\|N\)ormative Reference[s]\)\|[AE][ud][ti][ht]or['s]['s] .*Address[es]*\) *\)$;\1; /^1[.] /,/^Full Copyright Statement/s;^\(\([1-9][0-9]*\([.][1-9][0-9]*\)*\|\([A-Z]\|[1-9][0-9]*\)\([.][1-9][0-9]*\)\{1,\}\|Appendix [A-Z]\)[.] .*\)$;\1; # page separators s, ,
,