# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $ # # Parse HTML tables output by # http://docs.nanpa.com/cgi-bin/npa_reports/nanpa # Specifically, for each html table row (TR), # print the elements separated by colons. # # This could break on HTML comments. # :top # Strip ^Ms s/ //g # Join all lines with unterminated HTML tags /<[^>]*$/{ N b top } # Replace all with EOL tag s;;$;g # Join lines with only . /<[Tt][Rr][^>]*>$/{ N s/\n//g b top } # Also, join all lines starting with . /<[TtRr][^>]*>[^$]*$/{ N s/\n//g b top } # Remove EOL markers s/\$$// # Remove lines not starting with /<[Tt][Rr][^>]*>/!d # Replace all with colon s/[ ]*]*> */:/g # Strip all HTML tags s/<[^>]*>//g # Handle HTML characters s/ / /g # Compress spaces/tabs s/[ ][ ]*/ /g # Strip leading colons s/^:// # Strip leading/trailing whitespace s/^ // s/ $//