github的一些开源项目
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

317 lines
7.0 KiB

  1. #! /usr/bin/perl -w
  2. # Script to turn PCRE2 man pages into HTML
  3. # Subroutine to handle font changes and other escapes
  4. sub do_line {
  5. my($s) = $_[0];
  6. $s =~ s/</&#60;/g; # Deal with < and >
  7. $s =~ s/>/&#62;/g;
  8. $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
  9. $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
  10. $s =~ s"\\e"\\"g;
  11. $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
  12. $s;
  13. }
  14. # Subroutine to ensure not in a paragraph
  15. sub end_para {
  16. if ($inpara)
  17. {
  18. print TEMP "</PRE>\n" if ($inpre);
  19. print TEMP "</P>\n";
  20. }
  21. $inpara = $inpre = 0;
  22. $wrotetext = 0;
  23. }
  24. # Subroutine to start a new paragraph
  25. sub new_para {
  26. &end_para();
  27. print TEMP "<P>\n";
  28. $inpara = 1;
  29. }
  30. # Main program
  31. $innf = 0;
  32. $inpara = 0;
  33. $inpre = 0;
  34. $wrotetext = 0;
  35. $toc = 0;
  36. $ref = 1;
  37. while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
  38. {
  39. $toc = 1 if $ARGV[0] eq "-toc";
  40. shift;
  41. }
  42. # Initial output to STDOUT
  43. print <<End ;
  44. <html>
  45. <head>
  46. <title>$ARGV[0] specification</title>
  47. </head>
  48. <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
  49. <h1>$ARGV[0] man page</h1>
  50. <p>
  51. Return to the <a href="index.html">PCRE2 index page</a>.
  52. </p>
  53. <p>
  54. This page is part of the PCRE2 HTML documentation. It was generated
  55. automatically from the original man page. If there is any nonsense in it,
  56. please consult the man page, in case the conversion went wrong.
  57. <br>
  58. End
  59. print "<ul>\n" if ($toc);
  60. open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
  61. while (<STDIN>)
  62. {
  63. # Handle lines beginning with a dot
  64. if (/^\./)
  65. {
  66. # Some of the PCRE2 man pages used to contain instances of .br. However,
  67. # they should have all been removed because they cause trouble in some
  68. # (other) automated systems that translate man pages to HTML. Complain if
  69. # we find .br or .in (another macro that is deprecated).
  70. if (/^\.br/ || /^\.in/)
  71. {
  72. print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
  73. print STDERR "*** $_\n";
  74. die "*** Processing abandoned\n";
  75. }
  76. # Instead of .br, relevant "literal" sections are enclosed in .nf/.fi.
  77. elsif (/^\.nf/)
  78. {
  79. $innf = 1;
  80. }
  81. elsif (/^\.fi/)
  82. {
  83. $innf = 0;
  84. }
  85. # Handling .sp is subtle. If it is inside a literal section, do nothing if
  86. # the next line is a non literal text line; similarly, if not inside a
  87. # literal section, do nothing if a literal follows, unless we are inside
  88. # a .nf/.fi section or about to enter one. The point being that the <pre>
  89. # and </pre> that delimit literal sections will do the spacing. Always skip
  90. # if no previous output.
  91. elsif (/^\.sp/)
  92. {
  93. if ($wrotetext)
  94. {
  95. $_ = <STDIN>;
  96. if ($inpre)
  97. {
  98. print TEMP "\n" if (/^[\s.]/);
  99. }
  100. else
  101. {
  102. print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/);
  103. }
  104. redo; # Now process the lookahead line we just read
  105. }
  106. }
  107. elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
  108. {
  109. &new_para();
  110. }
  111. elsif (/^\.SH\s*("?)(.*)\1/)
  112. {
  113. # Ignore the NAME section
  114. if ($2 =~ /^NAME\b/)
  115. {
  116. <STDIN>;
  117. next;
  118. }
  119. &end_para();
  120. my($title) = &do_line($2);
  121. if ($toc)
  122. {
  123. printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
  124. $ref, $ref);
  125. printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
  126. $ref);
  127. $ref++;
  128. }
  129. else
  130. {
  131. print TEMP "<br><b>\n$title\n</b><br>\n";
  132. }
  133. }
  134. elsif (/^\.SS\s*("?)(.*)\1/)
  135. {
  136. &end_para();
  137. my($title) = &do_line($2);
  138. print TEMP "<br><b>\n$title\n</b><br>\n";
  139. }
  140. elsif (/^\.B\s*(.*)/)
  141. {
  142. &new_para() if (!$inpara);
  143. $_ = &do_line($1);
  144. s/"(.*?)"/$1/g;
  145. print TEMP "<b>$_</b>\n";
  146. $wrotetext = 1;
  147. }
  148. elsif (/^\.I\s*(.*)/)
  149. {
  150. &new_para() if (!$inpara);
  151. $_ = &do_line($1);
  152. s/"(.*?)"/$1/g;
  153. print TEMP "<i>$_</i>\n";
  154. $wrotetext = 1;
  155. }
  156. # Remove the "AUTOMATICALLY GENERATED" warning from pcre2demo.3
  157. elsif (/^\.\\"AUTOMATICALLY GENERATED/) { next; }
  158. # A comment that starts "HREF" takes the next line as a name that
  159. # is turned into a hyperlink, using the text given, which might be
  160. # in a special font. If it ends in () or (digits) or punctuation, they
  161. # aren't part of the link.
  162. elsif (/^\.\\"\s*HREF/)
  163. {
  164. $_=<STDIN>;
  165. chomp;
  166. $_ = &do_line($_);
  167. $_ =~ s/\s+$//;
  168. $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
  169. print TEMP "<a href=\"$1.html\">$_</a>\n";
  170. }
  171. # A comment that starts "HTML" inserts literal HTML
  172. elsif (/^\.\\"\s*HTML\s*(.*)/)
  173. {
  174. print TEMP $1;
  175. }
  176. # A comment that starts < inserts that HTML at the end of the
  177. # *next* input line - so as not to get a newline between them.
  178. elsif (/^\.\\"\s*(<.*>)/)
  179. {
  180. my($markup) = $1;
  181. $_=<STDIN>;
  182. chomp;
  183. $_ = &do_line($_);
  184. $_ =~ s/\s+$//;
  185. print TEMP "$_$markup\n";
  186. }
  187. # A comment that starts JOIN joins the next two lines together, with one
  188. # space between them. Then that line is processed. This is used in some
  189. # displays where two lines are needed for the "man" version. JOINSH works
  190. # the same, except that it assumes this is a shell command, so removes
  191. # continuation backslashes.
  192. elsif (/^\.\\"\s*JOIN(SH)?/)
  193. {
  194. my($one,$two);
  195. $one = <STDIN>;
  196. $two = <STDIN>;
  197. $one =~ s/\s*\\e\s*$// if (defined($1));
  198. chomp($one);
  199. $two =~ s/^\s+//;
  200. $_ = "$one $two";
  201. redo; # Process the joined lines
  202. }
  203. # .EX/.EE are used in the pcre2demo page to bracket the entire program,
  204. # which is unmodified except for turning backslash into "\e".
  205. elsif (/^\.EX\s*$/)
  206. {
  207. print TEMP "<PRE>\n";
  208. while (<STDIN>)
  209. {
  210. last if /^\.EE\s*$/;
  211. s/\\e/\\/g;
  212. s/&/&amp;/g;
  213. s/</&lt;/g;
  214. s/>/&gt;/g;
  215. print TEMP;
  216. }
  217. }
  218. # Ignore anything not recognized
  219. next;
  220. }
  221. # Line does not begin with a dot. Replace blank lines with new paragraphs
  222. if (/^\s*$/)
  223. {
  224. &end_para() if ($wrotetext);
  225. next;
  226. }
  227. # Convert fonts changes and output an ordinary line. Ensure that indented
  228. # lines are marked as literal.
  229. $_ = &do_line($_);
  230. &new_para() if (!$inpara);
  231. if (/^\s/)
  232. {
  233. if (!$inpre)
  234. {
  235. print TEMP "<pre>\n";
  236. $inpre = 1;
  237. }
  238. }
  239. elsif ($inpre)
  240. {
  241. print TEMP "</pre>\n";
  242. $inpre = 0;
  243. }
  244. # Add <br> to the end of a non-literal line if we are within .nf/.fi
  245. $_ .= "<br>\n" if (!$inpre && $innf);
  246. print TEMP;
  247. $wrotetext = 1;
  248. }
  249. # The TOC, if present, will have been written - terminate it
  250. print "</ul>\n" if ($toc);
  251. # Copy the remainder to the standard output
  252. close(TEMP);
  253. open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
  254. print while (<TEMP>);
  255. print <<End ;
  256. <p>
  257. Return to the <a href="index.html">PCRE2 index page</a>.
  258. </p>
  259. End
  260. close(TEMP);
  261. unlink("/tmp/$$");
  262. # End