?????????? ?????????

?????????? ????????? - ??????????????? - /home/agenciai/public_html/cd38d8/Simple.zip

???????
PK��]"\�Rd�%��%�� JustPod.pmnu��[��use 5; package Pod::Simple::JustPod; # ABSTRACT: Pod::Simple formatter that extracts POD from a file containing # other things as well use strict; use warnings; use Pod::Simple::Methody (); our @ISA = ('Pod::Simple::Methody'); sub new { my $self = shift; my $new = $self->SUPER::new(@_); $new->accept_targets(''); $new->keep_encoding_directive(1); $new->preserve_whitespace(1); $new->complain_stderr(1); $new->_output_is_for_JustPod(1); return $new; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub check_that_all_is_closed { # Actually checks that the things we depend on being balanced in fact are, # so that we can continue in spit of pod errors my $self = shift; while ($self->{inL}) { $self->end_L(@_); } while ($self->{fcode_end} && @{$self->{fcode_end}}) { $self->_end_fcode(@_); } } sub handle_text { # Add text to the output buffer. This is skipped if within a L<>, as we use # the 'raw' attribute of that tag instead. $_[0]{buffer} .= $_[1] unless $_[0]{inL} ; } sub spacer { # Prints the white space following things like =head1. This is normally a # blank, unless BlackBox has told us otherwise. my ($self, $arg) = @_; return unless $arg; my $spacer = ($arg->{'~orig_spacer'}) ? $arg->{'~orig_spacer'} : " "; $self->handle_text($spacer); } sub _generic_start { # Called from tags like =head1, etc. my ($self, $text, $arg) = @_; $self->check_that_all_is_closed(); $self->handle_text($text); $self->spacer($arg); } sub start_Document { shift->_generic_start("=pod\n\n"); } sub start_head1 { shift->_generic_start('=head1', @_); } sub start_head2 { shift->_generic_start('=head2', @_); } sub start_head3 { shift->_generic_start('=head3', @_); } sub start_head4 { shift->_generic_start('=head4', @_); } sub start_head5 { shift->_generic_start('=head5', @_); } sub start_head6 { shift->_generic_start('=head6', @_); } sub start_encoding { shift->_generic_start('=encoding', @_); } # sub start_Para # sub start_Verbatim sub start_item_bullet { # Handle =item my ($self, $arg) = @_; $self->check_that_all_is_closed(); $self->handle_text('=item'); # It can be that they said simply '=item', and it is inferred that it is to # be a bullet. if (! $arg->{'~orig_content'}) { $self->handle_text("\n\n"); } else { $self->spacer($arg); if ($arg->{'~_freaky_para_hack'}) { # See Message Id <87y3gtcwa2.fsf@hope.eyrie.org> my $item_text = $arg->{'~orig_content'}; my $trailing = quotemeta $arg->{'~_freaky_para_hack'}; $item_text =~ s/$trailing$//; $self->handle_text($item_text); } else { $self->handle_text("\n\n"); } } } sub start_item_number { # Handle '=item 2' my ($self, $arg) = @_; $self->check_that_all_is_closed(); $self->handle_text("=item"); $self->spacer($arg); $self->handle_text("$arg->{'~orig_content'}\n\n"); } sub start_item_text { # Handle '=item foo bar baz' my ($self, $arg) = @_; $self->check_that_all_is_closed(); $self->handle_text('=item'); $self->spacer($arg); } sub _end_item { my $self = shift; $self->check_that_all_is_closed(); $self->emit; } end_item_bullet = _end_item; end_item_number = _end_item; end_item_text = _end_item; sub _start_over { # Handle =over my ($self, $arg) = @_; $self->check_that_all_is_closed(); $self->handle_text("=over"); # The =over amount is optional if ($arg->{'~orig_content'}) { $self->spacer($arg); $self->handle_text("$arg->{'~orig_content'}"); } $self->handle_text("\n\n"); } start_over_bullet = _start_over; start_over_number = _start_over; start_over_text = _start_over; start_over_block = _start_over; sub _end_over { my $self = shift; $self->check_that_all_is_closed(); $self->handle_text('=back'); $self->emit; } end_over_bullet = _end_over; end_over_number = _end_over; end_over_text = _end_over; end_over_block = _end_over; sub end_Document { my $self = shift; $self->emit; # Make sure buffer gets flushed print {$self->{'output_fh'} } "=cut\n" } sub _end_generic { my $self = shift; $self->check_that_all_is_closed(); $self->emit; } end_head1 = _end_generic; end_head2 = _end_generic; end_head3 = _end_generic; end_head4 = _end_generic; end_head5 = _end_generic; end_head6 = _end_generic; end_encoding = _end_generic; end_Para = _end_generic; end_Verbatim = _end_generic; sub _start_fcode { my ($type, $self, $flags) = @_; # How many brackets is set by BlackBox unless the count is 1 my $bracket_count = (exists $flags->{'~bracket_count'}) ? $flags->{'~bracket_count'} : 1; $self->handle_text($type . ( "<" x $bracket_count)); my $rspacer = ""; if ($bracket_count > 1) { my $lspacer = (exists $flags->{'~lspacer'}) ? $flags->{'~lspacer'} : " "; $self->handle_text($lspacer); $rspacer = (exists $flags->{'~rspacer'}) ? $flags->{'~rspacer'} : " "; } # BlackBox doesn't output things for for the ending code callbacks, so save # what we need. push @{$self->{'fcode_end'}}, [ $bracket_count, $rspacer ]; } sub start_B { _start_fcode('B', @_); } sub start_C { _start_fcode('C', @_); } sub start_E { _start_fcode('E', @_); } sub start_F { _start_fcode('F', @_); } sub start_I { _start_fcode('I', @_); } sub start_S { _start_fcode('S', @_); } sub start_X { _start_fcode('X', @_); } sub start_Z { _start_fcode('Z', @_); } sub _end_fcode { my $self = shift; my $fcode_end = pop @{$self->{'fcode_end'}}; my $bracket_count = 1; my $rspacer = ""; if (! defined $fcode_end) { # If BlackBox is working, this shouldn't # happen, but verify $self->whine($self->{line_count}, "Extra '>'"); } else { $bracket_count = $fcode_end->[0]; $rspacer = $fcode_end->[1]; } $self->handle_text($rspacer) if $bracket_count > 1; $self->handle_text(">" x $bracket_count); } end_B = _end_fcode; end_C = _end_fcode; end_E = _end_fcode; end_F = _end_fcode; end_I = _end_fcode; end_S = _end_fcode; end_X = _end_fcode; end_Z = _end_fcode; sub start_L { _start_fcode('L', @_); $_[0]->handle_text($_[1]->{raw}); $_[0]->{inL}++ } sub end_L { my $self = shift; $self->{inL}--; if ($self->{inL} < 0) { # If BlackBox is working, this shouldn't # happen, but verify $self->whine($self->{line_count}, "Extra '>' ending L<>"); $self->{inL} = 0; } $self->_end_fcode(@_); } sub emit { my $self = shift; if ($self->{buffer} ne "") { print { $self->{'output_fh'} } "",$self->{buffer} ,"\n\n"; $self->{buffer} = ""; } return; } 1; __END__ =head1 NAME Pod::Simple::JustPod -- just the Pod, the whole Pod, and nothing but the Pod =head1 SYNOPSIS my $infile = "mixed_code_and_pod.pm"; my $outfile = "just_the_pod.pod"; open my $fh, ">$outfile" or die "Can't write to $outfile: $!"; my $parser = Pod::Simple::JustPod->new(); $parser->output_fh($fh); $parser->parse_file($infile); close $fh or die "Can't close $outfile: $!"; =head1 DESCRIPTION This class returns a copy of its input, translated into Perl's internal encoding (UTF-8), and with all the non-Pod lines removed. This is a subclass of L<Pod::Simple::Methody> and inherits all its methods. And since, that in turn is a subclass of L<Pod::Simple>, you can use any of its methods. This means you can output to a string instead of a file, or you can parse from an array. This class strives to return the Pod lines of the input completely unchanged, except for any necessary translation into Perl's internal encoding, and it makes no effort to return trailing spaces on lines; these likely will be stripped. If the input pod is well-formed with no warnings nor errors generated, the extracted pod should generate the same documentation when formatted by a Pod formatter as the original file does. By default, warnings are output to STDERR =head1 SEE ALSO L<Pod::Simple>, L<Pod::Simple::Methody> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the L<mailto:pod-people@perl.org> mail list. Send an empty email to L<mailto:pod-people-subscribe@perl.org> to subscribe. This module is managed in an open GitHub repository, L<https://github.com/theory/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/theory/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to L<mailto:<bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back Pod::Simple::JustPod was developed by John SJ Anderson C<genehack@genehack.org>, with contributions from Karl Williamson C<khw@cpan.org>. =cut PK��]"\��Subclassing.podnu��[��=head1 NAME Pod::Simple::Subclassing -- write a formatter as a Pod::Simple subclass =head1 SYNOPSIS package Pod::SomeFormatter; use Pod::Simple; @ISA = qw(Pod::Simple); $VERSION = '1.01'; use strict; sub _handle_element_start { my($parser, $element_name, $attr_hash_r) = @_; ... } sub _handle_element_end { my($parser, $element_name, $attr_hash_r) = @_; # NOTE: $attr_hash_r is only present when $element_name is "over" or "begin" # The remaining code excerpts will mostly ignore this $attr_hash_r, as it is # mostly useless. It is documented where "over-" and "begin" events are # documented. ... } sub _handle_text { my($parser, $text) = @_; ... } 1; =head1 DESCRIPTION This document is about using Pod::Simple to write a Pod processor, generally a Pod formatter. If you just want to know about using an existing Pod formatter, instead see its documentation and see also the docs in L<Pod::Simple>. B<The zeroeth step> in writing a Pod formatter is to make sure that there isn't already a decent one in CPAN. See L<http://search.cpan.org/>, and run a search on the name of the format you want to render to. Also consider joining the Pod People list L<http://lists.perl.org/showlist.cgi?name=pod-people> and asking whether anyone has a formatter for that format -- maybe someone cobbled one together but just hasn't released it. B<The first step> in writing a Pod processor is to read L<perlpodspec>, which contains information on writing a Pod parser (which has been largely taken care of by Pod::Simple), but also a lot of requirements and recommendations for writing a formatter. B<The second step> is to actually learn the format you're planning to format to -- or at least as much as you need to know to represent Pod, which probably isn't much. B<The third step> is to pick which of Pod::Simple's interfaces you want to use: =over =item Pod::Simple The basic L<Pod::Simple> interface that uses C<_handle_element_start()>, C<_handle_element_end()> and C<_handle_text()>. =item Pod::Simple::Methody The L<Pod::Simple::Methody> interface is event-based, similar to that of L<HTML::Parser> or L<XML::Parser>'s "Handlers". =item Pod::Simple::PullParser L<Pod::Simple::PullParser> provides a token-stream interface, sort of like L<HTML::TokeParser>'s interface. =item Pod::Simple::SimpleTree L<Pod::Simple::SimpleTree> provides a simple tree interface, rather like L<XML::Parser>'s "Tree" interface. Users familiar with XML handling will be comfortable with this interface. Users interested in outputting XML, should look into the modules that produce an XML representation of the Pod stream, notably L<Pod::Simple::XMLOutStream>; you can feed the output of such a class to whatever XML parsing system you are most at home with. =back B<The last step> is to write your code based on how the events (or tokens, or tree-nodes, or the XML, or however you're parsing) will map to constructs in the output format. Also be sure to consider how to escape text nodes containing arbitrary text, and what to do with text nodes that represent preformatted text (from verbatim sections). =head1 Events TODO intro... mention that events are supplied for implicits, like for missing >'s In the following section, we use XML to represent the event structure associated with a particular construct. That is, an opening tag represents the element start, the attributes of that opening tag are the attributes given to the callback, and the closing tag represents the end element. Three callback methods must be supplied by a class extending L<Pod::Simple> to receive the corresponding event: =over =item C<< $parser->_handle_element_start( I<element_name>, I<attr_hashref> ) >> =item C<< $parser->_handle_element_end( I<element_name> ) >> =item C<< $parser->_handle_text( I<text_string> ) >> =back Here's the comprehensive list of values you can expect as I<element_name> in your implementation of C<_handle_element_start> and C<_handle_element_end>:: =over =item events with an element_name of Document Parsing a document produces this event structure: <Document start_line="543"> ...all events... </Document> The value of the I<start_line> attribute will be the line number of the first Pod directive in the document. If there is no Pod in the given document, then the event structure will be this: <Document contentless="1" start_line="543"> </Document> In that case, the value of the I<start_line> attribute will not be meaningful; under current implementations, it will probably be the line number of the last line in the file. =item events with an element_name of Para Parsing a plain (non-verbatim, non-directive, non-data) paragraph in a Pod document produces this event structure: <Para start_line="543"> ...all events in this paragraph... </Para> The value of the I<start_line> attribute will be the line number of the start of the paragraph. For example, parsing this paragraph of Pod: The value of the I<start_line> attribute will be the line number of the start of the paragraph. produces this event structure: <Para start_line="129"> The value of the <I> start_line </I> attribute will be the line number of the first Pod directive in the document. </Para> =item events with an element_name of B, C, F, or I. Parsing a BE<lt>...E<gt> formatting code (or of course any of its semantically identical syntactic variants S<BE<lt>E<lt> ... E<gt>E<gt>>, or S<BE<lt>E<lt>E<lt>E<lt> ... E<gt>E<gt>E<gt>E<gt>>, etc.) produces this event structure: <B> ...stuff... </B> Currently, there are no attributes conveyed. Parsing C, F, or I codes produce the same structure, with only a different element name. If your parser object has been set to accept other formatting codes, then they will be presented like these B/C/F/I codes -- i.e., without any attributes. =item events with an element_name of S Normally, parsing an SE<lt>...E<gt> sequence produces this event structure, just as if it were a B/C/F/I code: <S> ...stuff... </S> However, Pod::Simple (and presumably all derived parsers) offers the C<nbsp_for_S> option which, if enabled, will suppress all S events, and instead change all spaces in the content to non-breaking spaces. This is intended for formatters that output to a format that has no code that means the same as SE<lt>...E<gt>, but which has a code/character that means non-breaking space. =item events with an element_name of X Normally, parsing an XE<lt>...E<gt> sequence produces this event structure, just as if it were a B/C/F/I code: <X> ...stuff... </X> However, Pod::Simple (and presumably all derived parsers) offers the C<nix_X_codes> option which, if enabled, will suppress all X events and ignore their content. For formatters/processors that don't use X events, this is presumably quite useful. =item events with an element_name of L Because the LE<lt>...E<gt> is the most complex construct in the language, it should not surprise you that the events it generates are the most complex in the language. Most of complexity is hidden away in the attribute values, so for those of you writing a Pod formatter that produces a non-hypertextual format, you can just ignore the attributes and treat an L event structure like a formatting element that (presumably) doesn't actually produce a change in formatting. That is, the content of the L event structure (as opposed to its attributes) is always what text should be displayed. There are, at first glance, three kinds of L links: URL, man, and pod. When a LE<lt>I<some_url>E<gt> code is parsed, it produces this event structure: <L content-implicit="yes" raw="that_url" to="that_url" type="url"> that_url </L> The C<type="url"> attribute is always specified for this type of L code. For example, this Pod source: L<http://www.perl.com/CPAN/authors/> produces this event structure: <L content-implicit="yes" raw="http://www.perl.com/CPAN/authors/" to="http://www.perl.com/CPAN/authors/" type="url"> http://www.perl.com/CPAN/authors/ </L> When a LE<lt>I<manpage(section)>E<gt> code is parsed (and these are fairly rare and not terribly useful), it produces this event structure: <L content-implicit="yes" raw="manpage(section)" to="manpage(section)" type="man"> manpage(section) </L> The C<type="man"> attribute is always specified for this type of L code. For example, this Pod source: L<crontab(5)> produces this event structure: <L content-implicit="yes" raw="crontab(5)" to="crontab(5)" type="man"> crontab(5) </L> In the rare cases where a man page link has a section specified, that text appears in a I<section> attribute. For example, this Pod source: L<crontab(5)/"ENVIRONMENT"> will produce this event structure: <L content-implicit="yes" raw="crontab(5)/"ENVIRONMENT"" section="ENVIRONMENT" to="crontab(5)" type="man"> "ENVIRONMENT" in crontab(5) </L> In the rare case where the Pod document has code like LE<lt>I<sometext>\|I<manpage(section)>E<gt>, then the I<sometext> will appear as the content of the element, the I<manpage(section)> text will appear only as the value of the I<to> attribute, and there will be no C<content-implicit="yes"> attribute (whose presence means that the Pod parser had to infer what text should appear as the link text -- as opposed to cases where that attribute is absent, which means that the Pod parser did I<not> have to infer the link text, because that L code explicitly specified some link text.) For example, this Pod source: L<hell itself!\|crontab(5)> will produce this event structure: <L raw="hell itself!\|crontab(5)" to="crontab(5)" type="man"> hell itself! </L> The last type of L structure is for links to/within Pod documents. It is the most complex because it can have a I<to> attribute, I<or> a I<section> attribute, or both. The C<type="pod"> attribute is always specified for this type of L code. In the most common case, the simple case of a LE<lt>podpageE<gt> code produces this event structure: <L content-implicit="yes" raw="podpage" to="podpage" type="pod"> podpage </L> For example, this Pod source: L<Net::Ping> produces this event structure: <L content-implicit="yes" raw="Net::Ping" to="Net::Ping" type="pod"> Net::Ping </L> In cases where there is link-text explicitly specified, it is to be found in the content of the element (and not the attributes), just as with the LE<lt>I<sometext>\|I<manpage(section)>E<gt> case discussed above. For example, this Pod source: L<Perl Error Messages\|perldiag> produces this event structure: <L raw="Perl Error Messages\|perldiag" to="perldiag" type="pod"> Perl Error Messages </L> In cases of links to a section in the current Pod document, there is a I<section> attribute instead of a I<to> attribute. For example, this Pod source: L</"Member Data"> produces this event structure: <L content-implicit="yes" raw="/"Member Data"" section="Member Data" type="pod"> "Member Data" </L> As another example, this Pod source: L<the various attributes\|/"Member Data"> produces this event structure: <L raw="the various attributes\|/"Member Data"" section="Member Data" type="pod"> the various attributes </L> In cases of links to a section in a different Pod document, there are both a I<section> attribute and a L<to> attribute. For example, this Pod source: L<perlsyn/"Basic BLOCKs and Switch Statements"> produces this event structure: <L content-implicit="yes" raw="perlsyn/"Basic BLOCKs and Switch Statements"" section="Basic BLOCKs and Switch Statements" to="perlsyn" type="pod"> "Basic BLOCKs and Switch Statements" in perlsyn </L> As another example, this Pod source: L<SWITCH statements\|perlsyn/"Basic BLOCKs and Switch Statements"> produces this event structure: <L raw="SWITCH statements\|perlsyn/"Basic BLOCKs and Switch Statements"" section="Basic BLOCKs and Switch Statements" to="perlsyn" type="pod"> SWITCH statements </L> Incidentally, note that we do not distinguish between these syntaxes: L</"Member Data"> L<"Member Data"> L</Member Data> L<Member Data> [deprecated syntax] That is, they all produce the same event structure (for the most part), namely: <L content-implicit="yes" raw="$depends_on_syntax" section="Member Data" type="pod"> "Member Data" </L> The I<raw> attribute depends on what the raw content of the C<LE<lt>E<gt>> is, so that is why the event structure is the same "for the most part". If you have not guessed it yet, the I<raw> attribute contains the raw, original, unescaped content of the C<LE<lt>E<gt>> formatting code. In addition to the examples above, take notice of the following event structure produced by the following C<LE<lt>E<gt>> formatting code. L<click B<here>\|page/About the C<-M> switch> <L raw="click B<here>\|page/About the C<-M> switch" section="About the -M switch" to="page" type="pod"> click B<here> </L> Specifically, notice that the formatting codes are present and unescaped in I<raw>. There is a known bug in the I<raw> attribute where any surrounding whitespace is condensed into a single ' '. For example, given LE<60> linkE<62>, I<raw> will be " link". =item events with an element_name of E or Z While there are Pod codes EE<lt>...E<gt> and ZE<lt>E<gt>, these I<do not> produce any E or Z events -- that is, there are no such events as E or Z. =item events with an element_name of Verbatim When a Pod verbatim paragraph (AKA "codeblock") is parsed, it produces this event structure: <Verbatim start_line="543" xml:space="preserve"> ...text... </Verbatim> The value of the I<start_line> attribute will be the line number of the first line of this verbatim block. The I<xml:space> attribute is always present, and always has the value "preserve". The text content will have tabs already expanded. =item events with an element_name of head1 .. head4 When a "=head1 ..." directive is parsed, it produces this event structure: <head1> ...stuff... </head1> For example, a directive consisting of this: =head1 Options to C<new> et al. will produce this event structure: <head1 start_line="543"> Options to <C> new </C> et al. </head1> "=head2" through "=head4" directives are the same, except for the element names in the event structure. =item events with an element_name of encoding In the default case, the events corresponding to C<=encoding> directives are not emitted. They are emitted if C<keep_encoding_directive> is true. In that case they produce event structures like L</"events with an element_name of head1 .. head4"> above. =item events with an element_name of over-bullet When an "=over ... Z<>=back" block is parsed where the items are a bulleted list, it will produce this event structure: <over-bullet indent="4" start_line="543"> <item-bullet start_line="545"> ...Stuff... </item-bullet> ...more item-bullets... </over-bullet fake-closer="1"> The attribute I<fake-closer> is only present if it is a true value; it is not present if it is a false value. It is shown in the above example to illustrate where the attribute is (in the B<closing> tag). It signifies that the C<=over> did not have a matching C<=back>, and thus Pod::Simple had to create a fake closer. For example, this Pod source: =over =item Something =back Would produce an event structure that does B<not> have the I<fake-closer> attribute, whereas this Pod source: =over =item * Gasp! An unclosed =over block! would. The rest of the over-* examples will not demonstrate this attribute, but they all can have it. See L<Pod::Checker>'s source for an example of this attribute being used. The value of the I<indent> attribute is whatever value is after the "=over" directive, as in "=over 8". If no such value is specified in the directive, then the I<indent> attribute has the value "4". For example, this Pod source: =over =item * Stuff =item * Bar I<baz>! =back produces this event structure: <over-bullet indent="4" start_line="10"> <item-bullet start_line="12"> Stuff </item-bullet> <item-bullet start_line="14"> Bar <I>baz</I>! </item-bullet> </over-bullet> =item events with an element_name of over-number When an "=over ... Z<>=back" block is parsed where the items are a numbered list, it will produce this event structure: <over-number indent="4" start_line="543"> <item-number number="1" start_line="545"> ...Stuff... </item-number> ...more item-number... </over-bullet> This is like the "over-bullet" event structure; but note that the contents are "item-number" instead of "item-bullet", and note that they will have a "number" attribute, which some formatters/processors may ignore (since, for example, there's no need for it in HTML when producing an "<UL><LI>...</LI>...</UL>" structure), but which any processor may use. Note that the values for the I<number> attributes of "item-number" elements in a given "over-number" area I<will> start at 1 and go up by one each time. If the Pod source doesn't follow that order (even though it really should!), whatever numbers it has will be ignored (with the correct values being put in the I<number> attributes), and an error message might be issued to the user. =item events with an element_name of over-text These events are somewhat unlike the other over-* structures, as far as what their contents are. When an "=over ... Z<>=back" block is parsed where the items are a list of text "subheadings", it will produce this event structure: <over-text indent="4" start_line="543"> <item-text> ...stuff... </item-text> ...stuff (generally Para or Verbatim elements)... <item-text> ...more item-text and/or stuff... </over-text> The I<indent> and I<fake-closer> attributes are as with the other over-* events. For example, this Pod source: =over =item Foo Stuff =item Bar I<baz>! Quux =back produces this event structure: <over-text indent="4" start_line="20"> <item-text start_line="22"> Foo </item-text> <Para start_line="24"> Stuff </Para> <item-text start_line="26"> Bar <I> baz </I> ! </item-text> <Para start_line="28"> Quux </Para> </over-text> =item events with an element_name of over-block These events are somewhat unlike the other over-* structures, as far as what their contents are. When an "=over ... Z<>=back" block is parsed where there are no items, it will produce this event structure: <over-block indent="4" start_line="543"> ...stuff (generally Para or Verbatim elements)... </over-block> The I<indent> and I<fake-closer> attributes are as with the other over-* events. For example, this Pod source: =over For cutting off our trade with all parts of the world For transporting us beyond seas to be tried for pretended offenses He is at this time transporting large armies of foreign mercenaries to complete the works of death, desolation and tyranny, already begun with circumstances of cruelty and perfidy scarcely paralleled in the most barbarous ages, and totally unworthy the head of a civilized nation. =back will produce this event structure: <over-block indent="4" start_line="2"> <Para start_line="4"> For cutting off our trade with all parts of the world </Para> <Para start_line="6"> For transporting us beyond seas to be tried for pretended offenses </Para> <Para start_line="8"> He is at this time transporting large armies of [...more text...] </Para> </over-block> =item events with an element_name of over-empty B<Note: These events are only triggered if C<parse_empty_lists()> is set to a true value.> These events are somewhat unlike the other over-* structures, as far as what their contents are. When an "=over ... Z<>=back" block is parsed where there is no content, it will produce this event structure: <over-empty indent="4" start_line="543"> </over-empty> The I<indent> and I<fake-closer> attributes are as with the other over-* events. For example, this Pod source: =over =over =back =back will produce this event structure: <over-block indent="4" start_line="1"> <over-empty indent="4" start_line="3"> </over-empty> </over-block> Note that the outer C<=over> is a block because it has no C<=item>s but still has content: the inner C<=over>. The inner C<=over>, in turn, is completely empty, and is treated as such. =item events with an element_name of item-bullet See L</"events with an element_name of over-bullet">, above. =item events with an element_name of item-number See L</"events with an element_name of over-number">, above. =item events with an element_name of item-text See L</"events with an element_name of over-text">, above. =item events with an element_name of for TODO... =item events with an element_name of Data TODO... =back =head1 More Pod::Simple Methods Pod::Simple provides a lot of methods that aren't generally interesting to the end user of an existing Pod formatter, but some of which you might find useful in writing a Pod formatter. They are listed below. The first several methods (the accept_* methods) are for declaring the capabilities of your parser, notably what C<=for I<targetname>> sections it's interested in, what extra NE<lt>...E<gt> codes it accepts beyond the ones described in the I<perlpod>. =over =item C<< $parser->accept_targets( I<SOMEVALUE> ) >> As the parser sees sections like: =for html <img src="fig1.jpg"> or =begin html <img src="fig1.jpg"> =end html ...the parser will ignore these sections unless your subclass has specified that it wants to see sections targeted to "html" (or whatever the formatter name is). If you want to process all sections, even if they're not targeted for you, call this before you start parsing: $parser->accept_targets(''); =item C<< $parser->accept_targets_as_text( I<SOMEVALUE> ) >> This is like accept_targets, except that it specifies also that the content of sections for this target should be treated as Pod text even if the target name in "=for I<targetname>" doesn't start with a ":". At time of writing, I don't think you'll need to use this. =item C<< $parser->accept_codes( I<Codename>, I<Codename>... ) >> This tells the parser that you accept additional formatting codes, beyond just the standard ones (I B C L F S X, plus the two weird ones you don't actually see in the parse tree, Z and E). For example, to also accept codes "N", "R", and "W": $parser->accept_codes( qw( N R W ) ); B<TODO: document how this interacts with =extend, and long element names> =item C<< $parser->accept_directive_as_data( I<directive_name> ) >> =item C<< $parser->accept_directive_as_verbatim( I<directive_name> ) >> =item C<< $parser->accept_directive_as_processed( I<directive_name> ) >> In the unlikely situation that you need to tell the parser that you will accept additional directives ("=foo" things), you need to first set the parser to treat its content as data (i.e., not really processed at all), or as verbatim (mostly just expanding tabs), or as processed text (parsing formatting codes like BE<lt>...E<gt>). For example, to accept a new directive "=method", you'd presumably use: $parser->accept_directive_as_processed("method"); so that you could have Pod lines like: =method I<$whatever> thing B<um> Making up your own directives breaks compatibility with other Pod formatters, in a way that using "=for I<target> ..." lines doesn't; however, you may find this useful if you're making a Pod superset format where you don't need to worry about compatibility. =item C<< $parser->nbsp_for_S( I<BOOLEAN> ); >> Setting this attribute to a true value (and by default it is false) will turn "SE<lt>...E<gt>" sequences into sequences of words separated by C<\xA0> (non-breaking space) characters. For example, it will take this: I like S<Dutch apple pie>, don't you? and treat it as if it were: I like DutchE<nbsp>appleE<nbsp>pie, don't you? This is handy for output formats that don't have anything quite like an "SE<lt>...E<gt>" code, but which do have a code for non-breaking space. There is currently no method for going the other way; but I can probably provide one upon request. =item C<< $parser->version_report() >> This returns a string reporting the $VERSION value from your module (and its classname) as well as the $VERSION value of Pod::Simple. Note that L<perlpodspec> requires output formats (wherever possible) to note this detail in a comment in the output format. For example, for some kind of SGML output format: print OUT "<!-- \n", $parser->version_report, "\n -->"; =item C<< $parser->pod_para_count() >> This returns the count of Pod paragraphs seen so far. =item C<< $parser->line_count() >> This is the current line number being parsed. But you might find the "line_number" event attribute more accurate, when it is present. =item C<< $parser->nix_X_codes( I<SOMEVALUE> ) >> This attribute, when set to a true value (and it is false by default) ignores any "XE<lt>...E<gt>" sequences in the document being parsed. Many formats don't actually use the content of these codes, so have no reason to process them. =item C<< $parser->keep_encoding_directive( I<SOMEVALUE> ) >> This attribute, when set to a true value (it is false by default) will keep C<=encoding> and its content in the event structure. Most formats don't actually need to process the content of an C<=encoding> directive, even when this directive sets the encoding and the processor makes use of the encoding information. Indeed, it is possible to know the encoding without processing the directive content. =item C<< $parser->merge_text( I<SOMEVALUE> ) >> This attribute, when set to a true value (and it is false by default) makes sure that only one event (or token, or node) will be created for any single contiguous sequence of text. For example, consider this somewhat contrived example: I just LOVE Z<>hotE<32>apple pie! When that is parsed and events are about to be called on it, it may actually seem to be four different text events, one right after another: one event for "I just LOVE ", one for "hot", one for " ", and one for "apple pie!". But if you have merge_text on, then you're guaranteed that it will be fired as one text event: "I just LOVE hot apple pie!". =item C<< $parser->code_handler( I<CODE_REF> ) >> This specifies code that should be called when a code line is seen (i.e., a line outside of the Pod). Normally this is undef, meaning that no code should be called. If you provide a routine, it should start out like this: sub get_code_line { # or whatever you'll call it my($line, $line_number, $parser) = @_; ... } Note, however, that sometimes the Pod events aren't processed in exactly the same order as the code lines are -- i.e., if you have a file with Pod, then code, then more Pod, sometimes the code will be processed (via whatever you have code_handler call) before the all of the preceding Pod has been processed. =item C<< $parser->cut_handler( I<CODE_REF> ) >> This is just like the code_handler attribute, except that it's for "=cut" lines, not code lines. The same caveats apply. "=cut" lines are unlikely to be interesting, but this is included for completeness. =item C<< $parser->pod_handler( I<CODE_REF> ) >> This is just like the code_handler attribute, except that it's for "=pod" lines, not code lines. The same caveats apply. "=pod" lines are unlikely to be interesting, but this is included for completeness. =item C<< $parser->whiteline_handler( I<CODE_REF> ) >> This is just like the code_handler attribute, except that it's for lines that are seemingly blank but have whitespace (" " and/or "\t") on them, not code lines. The same caveats apply. These lines are unlikely to be interesting, but this is included for completeness. =item C<< $parser->whine( I<linenumber>, I<complaint string> ) >> This notes a problem in the Pod, which will be reported in the "Pod Errors" section of the document and/or sent to STDERR, depending on the values of the attributes C<no_whining>, C<no_errata_section>, and C<complain_stderr>. =item C<< $parser->scream( I<linenumber>, I<complaint string> ) >> This notes an error like C<whine> does, except that it is not suppressible with C<no_whining>. This should be used only for very serious errors. =item C<< $parser->source_dead(1) >> This aborts parsing of the current document, by switching on the flag that indicates that EOF has been seen. In particularly drastic cases, you might want to do this. It's rather nicer than just calling C<die>! =item C<< $parser->hide_line_numbers( I<SOMEVALUE> ) >> Some subclasses that indiscriminately dump event attributes (well, except for ones beginning with "~") can use this object attribute for refraining to dump the "start_line" attribute. =item C<< $parser->no_whining( I<SOMEVALUE> ) >> This attribute, if set to true, will suppress reports of non-fatal error messages. The default value is false, meaning that complaints I<are> reported. How they get reported depends on the values of the attributes C<no_errata_section> and C<complain_stderr>. =item C<< $parser->no_errata_section( I<SOMEVALUE> ) >> This attribute, if set to true, will suppress generation of an errata section. The default value is false -- i.e., an errata section will be generated. =item C<< $parser->complain_stderr( I<SOMEVALUE> ) >> This attribute, if set to true will send complaints to STDERR. The default value is false -- i.e., complaints do not go to STDERR. =item C<< $parser->bare_output( I<SOMEVALUE> ) >> Some formatter subclasses use this as a flag for whether output should have prologue and epilogue code omitted. For example, setting this to true for an HTML formatter class should omit the "<html><head><title>...</title><body>..." prologue and the "</body></html>" epilogue. If you want to set this to true, you should probably also set C<no_whining> or at least C<no_errata_section> to true. =item C<< $parser->preserve_whitespace( I<SOMEVALUE> ) >> If you set this attribute to a true value, the parser will try to preserve whitespace in the output. This means that such formatting conventions as two spaces after periods will be preserved by the parser. This is primarily useful for output formats that treat whitespace as significant (such as text or roff, but not HTML). =item C<< $parser->parse_empty_lists( I<SOMEVALUE> ) >> If this attribute is set to true, the parser will not ignore empty C<=over>/C<=back> blocks. The type of C<=over> will be I<empty>, documented above, L<events with an element_name of over-empty>. =back =head1 SEE ALSO L<Pod::Simple> -- event-based Pod-parsing framework L<Pod::Simple::Methody> -- like Pod::Simple, but each sort of event calls its own method (like C<start_head3>) L<Pod::Simple::PullParser> -- a Pod-parsing framework like Pod::Simple, but with a token-stream interface L<Pod::Simple::SimpleTree> -- a Pod-parsing framework like Pod::Simple, but with a tree interface L<Pod::Simple::Checker> -- a simple Pod::Simple subclass that reads documents, and then makes a plaintext report of any errors found in the document L<Pod::Simple::DumpAsXML> -- for dumping Pod documents as tidily indented XML, showing each event on its own line L<Pod::Simple::XMLOutStream> -- dumps a Pod document as XML (without introducing extra whitespace as Pod::Simple::DumpAsXML does). L<Pod::Simple::DumpAsText> -- for dumping Pod documents as tidily indented text, showing each event on its own line L<Pod::Simple::LinkSection> -- class for objects representing the values of the TODO and TODO attributes of LE<lt>...E<gt> elements L<Pod::Escapes> -- the module that Pod::Simple uses for evaluating EE<lt>...E<gt> content L<Pod::Simple::Text> -- a simple plaintext formatter for Pod L<Pod::Simple::TextContent> -- like Pod::Simple::Text, but makes no effort for indent or wrap the text being formatted L<Pod::Simple::HTML> -- a simple HTML formatter for Pod L<perlpod\|perlpod> L<perlpodspec\|perlpodspec> L<perldoc> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =for notes Hm, my old podchecker version (1.2) says: * WARNING: node 'http://search.cpan.org/' contains non-escaped \| or / at line 38 in file Subclassing.pod * WARNING: node 'http://lists.perl.org/showlist.cgi?name=pod-people' contains non-escaped \| or / at line 41 in file Subclassing.pod Yes, L<...> is hard. =cut PK��]"\+x�O�d��d�� PullParser.pmnu��[��require 5; package Pod::Simple::PullParser; $VERSION = '3.42'; use Pod::Simple (); BEGIN {@ISA = ('Pod::Simple')} use strict; use Carp (); use Pod::Simple::PullParserStartToken; use Pod::Simple::PullParserEndToken; use Pod::Simple::PullParserTextToken; BEGIN { DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } __PACKAGE__->_accessorize( 'source_fh', # the filehandle we're reading from 'source_scalar_ref', # the scalarref we're reading from 'source_arrayref', # the arrayref we're reading from ); #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # # And here is how we implement a pull-parser on top of a push-parser... sub filter { my($self, $source) = @_; $self = $self->new unless ref $self; $source = STDIN{IO} unless defined $source; $self->set_source($source); $self->output_fh(STDOUT{IO}); $self->run; # define run() in a subclass if you want to use filter()! return $self; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub parse_string_document { my $this = shift; $this->set_source(\ $_[0]); $this->run; } sub parse_file { my($this, $filename) = @_; $this->set_source($filename); $this->run; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # In case anyone tries to use them: sub run { use Carp (); if( __PACKAGE__ eq ref($_[0]) \|\| $_[0]) { # I'm not being subclassed! Carp::croak "You can call run() only on subclasses of " . __PACKAGE__; } else { Carp::croak join '', "You can't call run() because ", ref($_[0]) \|\| $_[0], " didn't define a run() method"; } } sub parse_lines { use Carp (); Carp::croak "Use set_source with ", __PACKAGE__, " and subclasses, not parse_lines"; } sub parse_line { use Carp (); Carp::croak "Use set_source with ", __PACKAGE__, " and subclasses, not parse_line"; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub new { my $class = shift; my $self = $class->SUPER::new(@_); die "Couldn't construct for $class" unless $self; $self->{'token_buffer'} \|\|= []; $self->{'start_token_class'} \|\|= 'Pod::Simple::PullParserStartToken'; $self->{'text_token_class'} \|\|= 'Pod::Simple::PullParserTextToken'; $self->{'end_token_class'} \|\|= 'Pod::Simple::PullParserEndToken'; DEBUG > 1 and print STDERR "New pullparser object: $self\n"; return $self; } # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ sub get_token { my $self = shift; DEBUG > 1 and print STDERR "\nget_token starting up on $self.\n"; DEBUG > 2 and print STDERR " Items in token-buffer (", scalar( @{ $self->{'token_buffer'} } ) , ") :\n", map( " " . $_->dump . "\n", @{ $self->{'token_buffer'} } ), @{ $self->{'token_buffer'} } ? '' : ' (no tokens)', "\n" ; until( @{ $self->{'token_buffer'} } ) { DEBUG > 3 and print STDERR "I need to get something into my empty token buffer...\n"; if($self->{'source_dead'}) { DEBUG and print STDERR "$self 's source is dead.\n"; push @{ $self->{'token_buffer'} }, undef; } elsif(exists $self->{'source_fh'}) { my @lines; my $fh = $self->{'source_fh'} \|\| Carp::croak('You have to call set_source before you can call get_token'); DEBUG and print STDERR "$self 's source is filehandle $fh.\n"; # Read those many lines at a time for(my $i = Pod::Simple::MANY_LINES; $i--;) { DEBUG > 3 and print STDERR " Fetching a line from source filehandle $fh...\n"; local $/ = $Pod::Simple::NL; push @lines, scalar(<$fh>); # readline DEBUG > 3 and print STDERR " Line is: ", defined($lines[-1]) ? $lines[-1] : "<undef>\n"; unless( defined $lines[-1] ) { DEBUG and print STDERR "That's it for that source fh! Killing.\n"; delete $self->{'source_fh'}; # so it can be GC'd last; } # but pass thru the undef, which will set source_dead to true # TODO: look to see if $lines[-1] is =encoding, and if so, # do horribly magic things } if(DEBUG > 8) { print STDERR " I've gotten ", scalar(@lines), " lines:\n"; foreach my $l (@lines) { if(defined $l) { print STDERR " line {$l}\n"; } else { print STDERR " line undef\n"; } } print STDERR "* end of ", scalar(@lines), " lines\n"; } $self->SUPER::parse_lines(@lines); } elsif(exists $self->{'source_arrayref'}) { DEBUG and print STDERR "$self 's source is arrayref $self->{'source_arrayref'}, with ", scalar(@{$self->{'source_arrayref'}}), " items left in it.\n"; DEBUG > 3 and print STDERR " Fetching ", Pod::Simple::MANY_LINES, " lines.\n"; $self->SUPER::parse_lines( splice @{ $self->{'source_arrayref'} }, 0, Pod::Simple::MANY_LINES ); unless( @{ $self->{'source_arrayref'} } ) { DEBUG and print STDERR "That's it for that source arrayref! Killing.\n"; $self->SUPER::parse_lines(undef); delete $self->{'source_arrayref'}; # so it can be GC'd } # to make sure that an undef is always sent to signal end-of-stream } elsif(exists $self->{'source_scalar_ref'}) { DEBUG and print STDERR "$self 's source is scalarref $self->{'source_scalar_ref'}, with ", length(${ $self->{'source_scalar_ref'} }) - (pos(${ $self->{'source_scalar_ref'} }) \|\| 0), " characters left to parse.\n"; DEBUG > 3 and print STDERR " Fetching a line from source-string...\n"; if( ${ $self->{'source_scalar_ref'} } =~ m/([^\n\r])((?:\r?\n)?)/g ) { #print(">> $1\n"), $self->SUPER::parse_lines($1) if length($1) or length($2) or pos( ${ $self->{'source_scalar_ref'} }) != length( ${ $self->{'source_scalar_ref'} }); # I.e., unless it's a zero-length "empty line" at the very # end of "foo\nbar\n" (i.e., between the \n and the EOS). } else { # that's the end. Byebye $self->SUPER::parse_lines(undef); delete $self->{'source_scalar_ref'}; DEBUG and print STDERR "That's it for that source scalarref! Killing.\n"; } } else { die "What source??"; } } DEBUG and print STDERR "get_token about to return ", Pod::Simple::pretty( @{$self->{'token_buffer'}} ? $self->{'token_buffer'}[-1] : undef ), "\n"; return shift @{$self->{'token_buffer'}}; # that's an undef if empty } sub unget_token { my $self = shift; DEBUG and print STDERR "Ungetting ", scalar(@_), " tokens: ", @_ ? "@_\n" : "().\n"; foreach my $t (@_) { Carp::croak "Can't unget that, because it's not a token -- it's undef!" unless defined $t; Carp::croak "Can't unget $t, because it's not a token -- it's a string!" unless ref $t; Carp::croak "Can't unget $t, because it's not a token object!" unless UNIVERSAL::can($t, 'type'); } unshift @{$self->{'token_buffer'}}, @_; DEBUG > 1 and print STDERR "Token buffer now has ", scalar(@{$self->{'token_buffer'}}), " items in it.\n"; return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # $self->{'source_filename'} = $source; sub set_source { my $self = shift @_; return $self->{'source_fh'} unless @_; Carp::croak("Cannot assign new source to pull parser; create a new instance, instead") if $self->{'source_fh'} \|\| $self->{'source_scalar_ref'} \|\| $self->{'source_arrayref'}; my $handle; if(!defined $_[0]) { Carp::croak("Can't use empty-string as a source for set_source"); } elsif(ref(\( $_[0] )) eq 'GLOB') { $self->{'source_filename'} = '' . ($handle = $_[0]); DEBUG and print STDERR "$self 's source is glob $_[0]\n"; # and fall thru } elsif(ref( $_[0] ) eq 'SCALAR') { $self->{'source_scalar_ref'} = $_[0]; DEBUG and print STDERR "$self 's source is scalar ref $_[0]\n"; return; } elsif(ref( $_[0] ) eq 'ARRAY') { $self->{'source_arrayref'} = $_[0]; DEBUG and print STDERR "$self 's source is array ref $_[0]\n"; return; } elsif(ref $_[0]) { $self->{'source_filename'} = '' . ($handle = $_[0]); DEBUG and print STDERR "$self 's source is fh-obj $_[0]\n"; } elsif(!length $_[0]) { Carp::croak("Can't use empty-string as a source for set_source"); } else { # It's a filename! DEBUG and print STDERR "$self 's source is filename $_[0]\n"; { local PODSOURCE; open(PODSOURCE, "<$_[0]") \|\| Carp::croak "Can't open $_[0]: $!"; $handle = PODSOURCE{IO}; } $self->{'source_filename'} = $_[0]; DEBUG and print STDERR " Its name is $_[0].\n"; # TODO: file-discipline things here! } $self->{'source_fh'} = $handle; DEBUG and print STDERR " Its handle is $handle\n"; return 1; } # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ sub get_title_short { shift->get_short_title(@_) } # alias sub get_short_title { my $title = shift->get_title(@_); $title = $1 if $title =~ m/^(\S{1,60})\s+--?\s+./s; # turn "Foo::Bar -- bars for your foo" into "Foo::Bar" return $title; } sub get_title { shift->_get_titled_section( 'NAME', max_token => 50, desperate => 1, @_) } sub get_version { shift->_get_titled_section( 'VERSION', max_token => 400, accept_verbatim => 1, max_content_length => 3_000, @_, ); } sub get_description { shift->_get_titled_section( 'DESCRIPTION', max_token => 400, max_content_length => 3_000, @_, ) } sub get_authors { shift->get_author(@_) } # a harmless alias sub get_author { my $this = shift; # Max_token is so high because these are # typically at the end of the document: $this->_get_titled_section('AUTHOR' , max_token => 10_000, @_) \|\| $this->_get_titled_section('AUTHORS', max_token => 10_000, @_); } #-------------------------------------------------------------------------- sub _get_titled_section { # Based on a get_title originally contributed by Graham Barr my($self, $titlename, %options) = (@_); my $max_token = delete $options{'max_token'}; my $desperate_for_title = delete $options{'desperate'}; my $accept_verbatim = delete $options{'accept_verbatim'}; my $max_content_length = delete $options{'max_content_length'}; my $nocase = delete $options{'nocase'}; $max_content_length = 120 unless defined $max_content_length; Carp::croak( "Unknown " . ((1 == keys %options) ? "option: " : "options: ") . join " ", map "[$_]", sort keys %options ) if keys %options; my %content_containers; $content_containers{'Para'} = 1; if($accept_verbatim) { $content_containers{'Verbatim'} = 1; $content_containers{'VerbatimFormatted'} = 1; } my $token_count = 0; my $title; my @to_unget; my $state = 0; my $depth = 0; Carp::croak "What kind of titlename is \"$titlename\"?!" unless defined $titlename and $titlename =~ m/^[A-Z ]{1,60}$/s; #sanity my $titlename_re = quotemeta($titlename); my $head1_text_content; my $para_text_content; my $skipX; while( ++$token_count <= ($max_token \|\| 1_000_000) and defined(my $token = $self->get_token) ) { push @to_unget, $token; if ($state == 0) { # seeking =head1 if( $token->is_start and $token->tagname eq 'head1' ) { DEBUG and print STDERR " Found head1. Seeking content...\n"; ++$state; $head1_text_content = ''; } } elsif($state == 1) { # accumulating text until end of head1 if( $token->is_text ) { unless ($skipX) { DEBUG and print STDERR " Adding \"", $token->text, "\" to head1-content.\n"; $head1_text_content .= $token->text; } } elsif( $token->is_tagname('X') ) { # We're going to want to ignore X<> stuff. $skipX = $token->is_start; DEBUG and print STDERR +($skipX ? 'Start' : 'End'), 'ing ignoring of X<> tag'; } elsif( $token->is_end and $token->tagname eq 'head1' ) { DEBUG and print STDERR " Found end of head1. Considering content...\n"; $head1_text_content = uc $head1_text_content if $nocase; if($head1_text_content eq $titlename or $head1_text_content =~ m/$$titlename_re$/s # We accept "=head1 Nomen Modularis (NAME)" for sake of i18n ) { DEBUG and print STDERR " Yup, it was $titlename. Seeking next para-content...\n"; ++$state; } elsif( $desperate_for_title # if we're so desperate we'll take the first # =head1's content as a title and $head1_text_content =~ m/\S/ and $head1_text_content !~ m/^[ A-Z]+$/s and $head1_text_content !~ m/$(?: NAME \| TITLE \| VERSION \| AUTHORS? \| DESCRIPTION \| SYNOPSIS \| COPYRIGHT \| LICENSE \| NOTES? \| FUNCTIONS? \| METHODS? \| CAVEATS? \| BUGS? \| SEE\ ALSO \| SWITCHES \| ENVIRONMENT )$/sx # avoid accepting things like =head1 Thingy Thongy (DESCRIPTION) and ($max_content_length ? (length($head1_text_content) <= $max_content_length) # sanity : 1) ) { # Looks good; trim it ($title = $head1_text_content) =~ s/\s+$//; DEBUG and print STDERR " It looks titular: \"$title\".\n\n Using that.\n"; last; } else { --$state; DEBUG and print STDERR " Didn't look titular ($head1_text_content).\n", "\n Dropping back to seeking-head1-content mode...\n"; } } } elsif($state == 2) { # seeking start of para (which must immediately follow) if($token->is_start and $content_containers{ $token->tagname }) { DEBUG and print STDERR " Found start of Para. Accumulating content...\n"; $para_text_content = ''; ++$state; } else { DEBUG and print " Didn't see an immediately subsequent start-Para. Reseeking H1\n"; $state = 0; } } elsif($state == 3) { # accumulating text until end of Para if( $token->is_text ) { DEBUG and print STDERR " Adding \"", $token->text, "\" to para-content.\n"; $para_text_content .= $token->text; # and keep looking } elsif( $token->is_end and $content_containers{ $token->tagname } ) { DEBUG and print STDERR " Found end of Para. Considering content: ", $para_text_content, "\n"; if( $para_text_content =~ m/\S/ and ($max_content_length ? (length($para_text_content) <= $max_content_length) : 1) ) { # Some minimal sanity constraints, I think. DEBUG and print STDERR " It looks contentworthy, I guess. Using it.\n"; $title = $para_text_content; last; } else { DEBUG and print STDERR " Doesn't look at all contentworthy!\n Giving up.\n"; undef $title; last; } } } else { die "IMPOSSIBLE STATE $state!\n"; # should never happen } } # Put it all back! $self->unget_token(@to_unget); if(DEBUG) { if(defined $title) { print STDERR " Returning title <$title>\n" } else { print STDERR "Returning title <>\n" } } return '' unless defined $title; $title =~ s/^\s+//; return $title; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # # Methods that actually do work at parse-time: sub _handle_element_start { my $self = shift; # leaving ($element_name, $attr_hash_r) DEBUG > 2 and print STDERR "++ $_[0] (", map("<$_> ", %{$_[1]}), ")\n"; push @{ $self->{'token_buffer'} }, $self->{'start_token_class'}->new(@_); return; } sub _handle_text { my $self = shift; # leaving ($text) DEBUG > 2 and print STDERR "== $_[0]\n"; push @{ $self->{'token_buffer'} }, $self->{'text_token_class'}->new(@_); return; } sub _handle_element_end { my $self = shift; # leaving ($element_name); DEBUG > 2 and print STDERR "-- $_[0]\n"; push @{ $self->{'token_buffer'} }, $self->{'end_token_class'}->new(@_); return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1; __END__ =head1 NAME Pod::Simple::PullParser -- a pull-parser interface to parsing Pod =head1 SYNOPSIS my $parser = SomePodProcessor->new; $parser->set_source( "whatever.pod" ); $parser->run; Or: my $parser = SomePodProcessor->new; $parser->set_source( $some_filehandle_object ); $parser->run; Or: my $parser = SomePodProcessor->new; $parser->set_source( \$document_source ); $parser->run; Or: my $parser = SomePodProcessor->new; $parser->set_source( \@document_lines ); $parser->run; And elsewhere: require 5; package SomePodProcessor; use strict; use base qw(Pod::Simple::PullParser); sub run { my $self = shift; Token: while(my $token = $self->get_token) { ...process each token... } } =head1 DESCRIPTION This class is for using Pod::Simple to build a Pod processor -- but one that uses an interface based on a stream of token objects, instead of based on events. This is a subclass of L<Pod::Simple> and inherits all its methods. A subclass of Pod::Simple::PullParser should define a C<run> method that calls C<< $token = $parser->get_token >> to pull tokens. See the source for Pod::Simple::RTF for an example of a formatter that uses Pod::Simple::PullParser. =head1 METHODS =over =item my $token = $parser->get_token This returns the next token object (which will be of a subclass of L<Pod::Simple::PullParserToken>), or undef if the parser-stream has hit the end of the document. =item $parser->unget_token( $token ) =item $parser->unget_token( $token1, $token2, ... ) This restores the token object(s) to the front of the parser stream. =back The source has to be set before you can parse anything. The lowest-level way is to call C<set_source>: =over =item $parser->set_source( $filename ) =item $parser->set_source( $filehandle_object ) =item $parser->set_source( \$document_source ) =item $parser->set_source( \@document_lines ) =back Or you can call these methods, which Pod::Simple::PullParser has defined to work just like Pod::Simple's same-named methods: =over =item $parser->parse_file(...) =item $parser->parse_string_document(...) =item $parser->filter(...) =item $parser->parse_from_file(...) =back For those to work, the Pod-processing subclass of Pod::Simple::PullParser has to have defined a $parser->run method -- so it is advised that all Pod::Simple::PullParser subclasses do so. See the Synopsis above, or the source for Pod::Simple::RTF. Authors of formatter subclasses might find these methods useful to call on a parser object that you haven't started pulling tokens from yet: =over =item my $title_string = $parser->get_title This tries to get the title string out of $parser, by getting some tokens, and scanning them for the title, and then ungetting them so that you can process the token-stream from the beginning. For example, suppose you have a document that starts out: =head1 NAME Hoo::Boy::Wowza -- Stuff B<wow> yeah! $parser->get_title on that document will return "Hoo::Boy::Wowza -- Stuff wow yeah!". If the document starts with: =head1 Name Hoo::Boy::W00t -- Stuff B<w00t> yeah! Then you'll need to pass the C<nocase> option in order to recognize "Name": $parser->get_title(nocase => 1); In cases where get_title can't find the title, it will return empty-string (""). =item my $title_string = $parser->get_short_title This is just like get_title, except that it returns just the modulename, if the title seems to be of the form "SomeModuleName -- description". For example, suppose you have a document that starts out: =head1 NAME Hoo::Boy::Wowza -- Stuff B<wow> yeah! then $parser->get_short_title on that document will return "Hoo::Boy::Wowza". But if the document starts out: =head1 NAME Hooboy, stuff B<wow> yeah! then $parser->get_short_title on that document will return "Hooboy, stuff wow yeah!". If the document starts with: =head1 Name Hoo::Boy::W00t -- Stuff B<w00t> yeah! Then you'll need to pass the C<nocase> option in order to recognize "Name": $parser->get_short_title(nocase => 1); If the title can't be found, then get_short_title returns empty-string (""). =item $author_name = $parser->get_author This works like get_title except that it returns the contents of the "=head1 AUTHOR\n\nParagraph...\n" section, assuming that that section isn't terribly long. To recognize a "=head1 Author\n\nParagraph\n" section, pass the C<nocase> option: $parser->get_author(nocase => 1); (This method tolerates "AUTHORS" instead of "AUTHOR" too.) =item $description_name = $parser->get_description This works like get_title except that it returns the contents of the "=head1 DESCRIPTION\n\nParagraph...\n" section, assuming that that section isn't terribly long. To recognize a "=head1 Description\n\nParagraph\n" section, pass the C<nocase> option: $parser->get_description(nocase => 1); =item $version_block = $parser->get_version This works like get_title except that it returns the contents of the "=head1 VERSION\n\n[BIG BLOCK]\n" block. Note that this does NOT return the module's C<$VERSION>!! To recognize a "=head1 Version\n\n[BIG BLOCK]\n" section, pass the C<nocase> option: $parser->get_version(nocase => 1); =back =head1 NOTE You don't actually I<have> to define a C<run> method. If you're writing a Pod-formatter class, you should define a C<run> just so that users can call C<parse_file> etc, but you don't I<have> to. And if you're not writing a formatter class, but are instead just writing a program that does something simple with a Pod::PullParser object (and not an object of a subclass), then there's no reason to bother subclassing to add a C<run> method. =head1 SEE ALSO L<Pod::Simple> L<Pod::Simple::PullParserToken> -- and its subclasses L<Pod::Simple::PullParserStartToken>, L<Pod::Simple::PullParserTextToken>, and L<Pod::Simple::PullParserEndToken>. L<HTML::TokeParser>, which inspired this. =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut JUNK: sub _old_get_title { # some witchery in here my $self = $_[0]; my $title; my @to_unget; while(1) { push @to_unget, $self->get_token; unless(defined $to_unget[-1]) { # whoops, short doc! pop @to_unget; last; } DEBUG and print STDERR "-Got token ", $to_unget[-1]->dump, "\n"; (DEBUG and print STDERR "Too much in the buffer.\n"), last if @to_unget > 25; # sanity my $pattern = ''; if( #$to_unget[-1]->type eq 'end' #and $to_unget[-1]->tagname eq 'Para' #and ($pattern = join('', map {; ($_->type eq 'start') ? ("<" . $_->tagname .">") : ($_->type eq 'end' ) ? ("</". $_->tagname .">") : ($_->type eq 'text' ) ? ($_->text =~ m<^([A-Z]+)$>s ? $1 : 'X') : "BLORP" } @to_unget )) =~ m{<head1>NAME</head1><Para>(X\|</?[BCIFLS]>)+</Para>$}s ) { # Whee, it fits the pattern DEBUG and print STDERR "Seems to match =head1 NAME pattern.\n"; $title = ''; foreach my $t (reverse @to_unget) { last if $t->type eq 'start' and $t->tagname eq 'Para'; $title = $t->text . $title if $t->type eq 'text'; } undef $title if $title =~ m<^\s$>; # make sure it's contentful! last; } elsif ($pattern =~ m{<head(\d)>(.+)</head\d>$} and !( $1 eq '1' and $2 eq 'NAME' ) ) { # Well, it fits a fallback pattern DEBUG and print STDERR "Seems to match NAMEless pattern.\n"; $title = ''; foreach my $t (reverse @to_unget) { last if $t->type eq 'start' and $t->tagname =~ m/^head\d$/s; $title = $t->text . $title if $t->type eq 'text'; } undef $title if $title =~ m<^\s$>; # make sure it's contentful! last; } else { DEBUG and $pattern and print STDERR "Leading pattern: $pattern\n"; } } # Put it all back: $self->unget_token(@to_unget); if(DEBUG) { if(defined $title) { print STDERR " Returning title <$title>\n" } else { print STDERR "Returning title <>\n" } } return '' unless defined $title; return $title; } PK��]"\�'5�� SimpleTree.pmnu��[�� require 5; package Pod::Simple::SimpleTree; use strict; use Carp (); use Pod::Simple (); use vars qw( $ATTR_PAD @ISA $VERSION $SORT_ATTRS); $VERSION = '3.42'; BEGIN { @ISA = ('Pod::Simple'); DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG; } __PACKAGE__->_accessorize( 'root', # root of the tree ); #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub _handle_element_start { # self, tagname, attrhash DEBUG > 2 and print STDERR "Handling $_[1] start-event\n"; my $x = [$_[1], $_[2]]; if($_[0]{'_currpos'}) { push @{ $_[0]{'_currpos'}[0] }, $x; # insert in parent's child-list unshift @{ $_[0]{'_currpos'} }, $x; # prefix to stack } else { DEBUG and print STDERR " And oo, it gets to be root!\n"; $_[0]{'_currpos'} = [ $_[0]{'root'} = $x ]; # first event! set to stack, and set as root. } DEBUG > 3 and print STDERR "Stack is now: ", join(">", map $_->[0], @{$_[0]{'_currpos'}}), "\n"; return; } sub _handle_element_end { # self, tagname DEBUG > 2 and print STDERR "Handling $_[1] end-event\n"; shift @{$_[0]{'_currpos'}}; DEBUG > 3 and print STDERR "Stack is now: ", join(">", map $_->[0], @{$_[0]{'_currpos'}}), "\n"; return; } sub _handle_text { # self, text DEBUG > 2 and print STDERR "Handling $_[1] text-event\n"; push @{ $_[0]{'_currpos'}[0] }, $_[1]; return; } # A bit of evil from the black box... please avert your eyes, kind souls. sub _traverse_treelet_bit { DEBUG > 2 and print STDERR "Handling $_[1] paragraph event\n"; my $self = shift; push @{ $self->{'_currpos'}[0] }, [@_]; return; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1; __END__ =head1 NAME Pod::Simple::SimpleTree -- parse Pod into a simple parse tree =head1 SYNOPSIS % cat ptest.pod =head1 PIE I like B<pie>! % perl -MPod::Simple::SimpleTree -MData::Dumper -e \ "print Dumper(Pod::Simple::SimpleTree->new->parse_file(shift)->root)" \ ptest.pod $VAR1 = [ 'Document', { 'start_line' => 1 }, [ 'head1', { 'start_line' => 1 }, 'PIE' ], [ 'Para', { 'start_line' => 3 }, 'I like ', [ 'B', {}, 'pie' ], '!' ] ]; =head1 DESCRIPTION This class is of interest to people writing a Pod processor/formatter. This class takes Pod and parses it, returning a parse tree made just of arrayrefs, and hashrefs, and strings. This is a subclass of L<Pod::Simple> and inherits all its methods. This class is inspired by XML::Parser's "Tree" parsing-style, although it doesn't use exactly the same LoL format. =head1 METHODS At the end of the parse, call C<< $parser->root >> to get the tree's top node. =head1 Tree Contents Every element node in the parse tree is represented by an arrayref of the form: C<[ I<elementname>, \%attributes, I<...subnodes...> ]>. See the example tree dump in the Synopsis, above. Every text node in the tree is represented by a simple (non-ref) string scalar. So you can test C<ref($node)> to see whether you have an element node or just a text node. The top node in the tree is C<[ 'Document', \%attributes, I<...subnodes...> ]> =head1 SEE ALSO L<Pod::Simple> L<perllol> L<The "Tree" subsubsection in XML::Parser\|XML::Parser/"Tree"> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\�8�M��DumpAsXML.pmnu��[�� require 5; package Pod::Simple::DumpAsXML; $VERSION = '3.42'; use Pod::Simple (); BEGIN {@ISA = ('Pod::Simple')} use strict; use Carp (); use Text::Wrap qw(wrap); BEGIN { DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } sub new { my $self = shift; my $new = $self->SUPER::new(@_); $new->{'output_fh'} \|\|= STDOUT{IO}; $new->accept_codes('VerbatimFormatted'); $new->keep_encoding_directive(1); return $new; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub _handle_element_start { # ($self, $element_name, $attr_hash_r) my $fh = $_[0]{'output_fh'}; my($key, $value); DEBUG and print STDERR "++ $_[1]\n"; print $fh ' ' x ($_[0]{'indent'} \|\| 0), "<", $_[1]; foreach my $key (sort keys %{$_[2]}) { unless($key =~ m/^~/s) { next if $key eq 'start_line' and $_[0]{'hide_line_numbers'}; _xml_escape($value = $_[2]{$key}); print $fh ' ', $key, '="', $value, '"'; } } print $fh ">\n"; $_[0]{'indent'}++; return; } sub _handle_text { DEBUG and print STDERR "== \"$_[1]\"\n"; if(length $_[1]) { my $indent = ' ' x $_[0]{'indent'}; my $text = $_[1]; _xml_escape($text); local $Text::Wrap::huge = 'overflow'; $text = wrap('', $indent, $text); print {$_[0]{'output_fh'}} $indent, $text, "\n"; } return; } sub _handle_element_end { DEBUG and print STDERR "-- $_[1]\n"; print {$_[0]{'output_fh'}} ' ' x --$_[0]{'indent'}, "</", $_[1], ">\n"; return; } # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . sub _xml_escape { foreach my $x (@_) { # Escape things very cautiously: if ($] ge 5.007_003) { $x =~ s/([^-\n\t !\#\$\%\\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg; } else { # Is broken for non-ASCII platforms on early perls $x =~ s/([^-\n\t !\#\$\%\\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; } # Yes, stipulate the list without a range, so that this can work right on # all charsets that this module happens to run under. } return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1; __END__ =head1 NAME Pod::Simple::DumpAsXML -- turn Pod into XML =head1 SYNOPSIS perl -MPod::Simple::DumpAsXML -e \ "exit Pod::Simple::DumpAsXML->filter(shift)->any_errata_seen" \ thingy.pod =head1 DESCRIPTION Pod::Simple::DumpAsXML is a subclass of L<Pod::Simple> that parses Pod and turns it into indented and wrapped XML. This class is of interest to people writing Pod formatters based on Pod::Simple. Pod::Simple::DumpAsXML inherits methods from L<Pod::Simple>. =head1 SEE ALSO L<Pod::Simple::XMLOutStream> is rather like this class. Pod::Simple::XMLOutStream's output is space-padded in a way that's better for sending to an XML processor (that is, it has no ignorable whitespace). But Pod::Simple::DumpAsXML's output is much more human-readable, being (more-or-less) one token per line, with line-wrapping. L<Pod::Simple::DumpAsText> is rather like this class, except that it doesn't dump with XML syntax. Try them and see which one you like best! L<Pod::Simple>, L<Pod::Simple::DumpAsXML> The older libraries L<Pod::PXML>, L<Pod::XML>, L<Pod::SAX> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\{�2��2��PullParserStartToken.pmnu��[�� require 5; package Pod::Simple::PullParserStartToken; use Pod::Simple::PullParserToken (); use strict; use vars qw(@ISA $VERSION); @ISA = ('Pod::Simple::PullParserToken'); $VERSION = '3.42'; sub new { # Class->new(tagname, optional_attrhash); my $class = shift; return bless ['start', @_], ref($class) \|\| $class; } # Purely accessors: sub tagname { (@_ == 2) ? ($_[0][1] = $_[1]) : $_[0][1] } sub tag { shift->tagname(@_) } sub is_tagname { $_[0][1] eq $_[1] } sub is_tag { shift->is_tagname(@_) } sub attr_hash { $_[0][2] \|\|= {} } sub attr { if(@_ == 2) { # Reading: $token->attr('attrname') ${$_[0][2] \|\| return undef}{ $_[1] }; } elsif(@_ > 2) { # Writing: $token->attr('attrname', 'newval') ${$_[0][2] \|\|= {}}{ $_[1] } = $_[2]; } else { require Carp; Carp::croak( 'usage: $object->attr("val") or $object->attr("key", "newval")'); return undef; } } 1; __END__ =head1 NAME Pod::Simple::PullParserStartToken -- start-tokens from Pod::Simple::PullParser =head1 SYNOPSIS (See L<Pod::Simple::PullParser>) =head1 DESCRIPTION When you do $parser->get_token on a L<Pod::Simple::PullParser> object, you might get an object of this class. This is a subclass of L<Pod::Simple::PullParserToken> and inherits all its methods, and adds these methods: =over =item $token->tagname This returns the tagname for this start-token object. For example, parsing a "=head1 ..." line will give you a start-token with the tagname of "head1", token(s) for its content, and then an end-token with the tagname of "head1". =item $token->tagname(I<somestring>) This changes the tagname for this start-token object. You probably won't need to do this. =item $token->tag(...) A shortcut for $token->tagname(...) =item $token->is_tag(I<somestring>) or $token->is_tagname(I<somestring>) These are shortcuts for C<< $token->tag() eq I<somestring> >> =item $token->attr(I<attrname>) This returns the value of the I<attrname> attribute for this start-token object, or undef. For example, parsing a LZ<><Foo/"Bar"> link will produce a start-token with a "to" attribute with the value "Foo", a "type" attribute with the value "pod", and a "section" attribute with the value "Bar". =item $token->attr(I<attrname>, I<newvalue>) This sets the I<attrname> attribute for this start-token object to I<newvalue>. You probably won't need to do this. =item $token->attr_hash This returns the hashref that is the attribute set for this start-token. This is useful if (for example) you want to ask what all the attributes are -- you can just do C<< keys %{$token->attr_hash} >> =back You're unlikely to ever need to construct an object of this class for yourself, but if you want to, call C<< Pod::Simple::PullParserStartToken->new( I<tagname>, I<attrhash> ) >> =head1 SEE ALSO L<Pod::Simple::PullParserToken>, L<Pod::Simple>, L<Pod::Simple::Subclassing> =head1 SEE ALSO L<Pod::Simple::PullParserToken>, L<Pod::Simple>, L<Pod::Simple::Subclassing> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\��N�� Search.pmnu��[��require 5.005; package Pod::Simple::Search; use strict; use vars qw($VERSION $MAX_VERSION_WITHIN $SLEEPY); $VERSION = '3.42'; ## Current version of this package BEGIN { DEBUG = sub () {0} unless defined &DEBUG; } # set DEBUG level use Carp (); $SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin\|mac/i; # flag to occasionally sleep for $SLEEPY - 1 seconds. $MAX_VERSION_WITHIN \|\|= 60; ############################################################################# #use diagnostics; use File::Spec (); use File::Basename qw( basename dirname ); use Config (); use Cwd qw( cwd ); #========================================================================== __PACKAGE__->_accessorize( # Make my dumb accessor methods 'callback', 'progress', 'dir_prefix', 'inc', 'laborious', 'limit_glob', 'limit_re', 'shadows', 'verbose', 'name2path', 'path2name', 'recurse', 'ciseen', 'is_case_insensitive' ); #========================================================================== sub new { my $class = shift; my $self = bless {}, ref($class) \|\| $class; $self->init; return $self; } sub init { my $self = shift; $self->inc(1); $self->recurse(1); $self->verbose(DEBUG); $self->is_case_insensitive(-e uc __FILE__ && -e lc __FILE__); return $self; } #-------------------------------------------------------------------------- sub survey { my($self, @search_dirs) = @_; $self = $self->new unless ref $self; # tolerate being a class method $self->_expand_inc( \@search_dirs ); $self->{'_scan_count'} = 0; $self->{'_dirs_visited'} = {}; $self->path2name( {} ); $self->name2path( {} ); $self->ciseen( {} ); $self->limit_re( $self->_limit_glob_to_limit_re ) if $self->{'limit_glob'}; my $cwd = cwd(); my $verbose = $self->verbose; local $_; # don't clobber the caller's $_ ! foreach my $try (@search_dirs) { unless( File::Spec->file_name_is_absolute($try) ) { # make path absolute $try = File::Spec->catfile( $cwd ,$try); } # simplify path $try = File::Spec->canonpath($try); my $start_in; my $modname_prefix; if($self->{'dir_prefix'}) { $start_in = File::Spec->catdir( $try, grep length($_), split '[\\/:]+', $self->{'dir_prefix'} ); $modname_prefix = [grep length($_), split m{[:/\\]}, $self->{'dir_prefix'}]; $verbose and print "Appending \"$self->{'dir_prefix'}\" to $try, ", "giving $start_in (= @$modname_prefix)\n"; } else { $start_in = $try; } if( $self->{'_dirs_visited'}{$start_in} ) { $verbose and print "Directory '$start_in' already seen, skipping.\n"; next; } else { $self->{'_dirs_visited'}{$start_in} = 1; } unless(-e $start_in) { $verbose and print "Skipping non-existent $start_in\n"; next; } my $closure = $self->_make_search_callback; if(-d $start_in) { # Normal case: $verbose and print "Beginning excursion under $start_in\n"; $self->_recurse_dir( $start_in, $closure, $modname_prefix ); $verbose and print "Back from excursion under $start_in\n\n"; } elsif(-f _) { # A excursion consisting of just one file! $_ = basename($start_in); $verbose and print "Pondering $start_in ($_)\n"; $closure->($start_in, $_, 0, []); } else { $verbose and print "Skipping mysterious $start_in\n"; } } $self->progress and $self->progress->done( "Noted $$self{'_scan_count'} Pod files total"); $self->ciseen( {} ); return unless defined wantarray; # void return $self->name2path unless wantarray; # scalar return $self->name2path, $self->path2name; # list } #========================================================================== sub _make_search_callback { my $self = $_[0]; # Put the options in variables, for easy access my( $laborious, $verbose, $shadows, $limit_re, $callback, $progress, $path2name, $name2path, $recurse, $ciseen, $is_case_insensitive) = map scalar($self->$_()), qw(laborious verbose shadows limit_re callback progress path2name name2path recurse ciseen is_case_insensitive); my ($seen, $remember, $files_for); if ($is_case_insensitive) { $seen = sub { $ciseen->{ lc $_[0] } }; $remember = sub { $name2path->{ $_[0] } = $ciseen->{ lc $_[0] } = $_[1]; }; $files_for = sub { my $n = lc $_[0]; grep { lc $path2name->{$_} eq $n } %{ $path2name } }; } else { $seen = sub { $name2path->{ $_[0] } }; $remember = sub { $name2path->{ $_[0] } = $_[1] }; $files_for = sub { my $n = $_[0]; grep { $path2name->{$_} eq $n } %{ $path2name } }; } my($file, $shortname, $isdir, $modname_bits); return sub { ($file, $shortname, $isdir, $modname_bits) = @_; if($isdir) { # this never gets called on the startdir itself, just subdirs unless( $recurse ) { $verbose and print "Not recursing into '$file' as per requested.\n"; return 'PRUNE'; } if( $self->{'_dirs_visited'}{$file} ) { $verbose and print "Directory '$file' already seen, skipping.\n"; return 'PRUNE'; } print "Looking in dir $file\n" if $verbose; unless ($laborious) { # $laborious overrides pruning if( m/^(\d+\.[\d_]{3,})\z/s and do { my $x = $1; $x =~ tr/_//d; $x != $] } ) { $verbose and print "Perl $] version mismatch on $_, skipping.\n"; return 'PRUNE'; } if( m/^([A-Za-z][a-zA-Z0-9_])\z/s ) { $verbose and print "$_ is a well-named module subdir. Looking....\n"; } else { $verbose and print "$_ is a fishy directory name. Skipping.\n"; return 'PRUNE'; } } # end unless $laborious $self->{'_dirs_visited'}{$file} = 1; return; # (not pruning); } # Make sure it's a file even worth even considering if($laborious) { unless( m/\.(pod\|pm\|plx?)\z/i \|\| -x _ and -T _ # Note that the cheapest operation (the RE) is run first. ) { $verbose > 1 and print " Brushing off uninteresting $file\n"; return; } } else { unless( m/^[-_a-zA-Z0-9]+\.(?:pod\|pm\|plx?)\z/is ) { $verbose > 1 and print " Brushing off oddly-named $file\n"; return; } } $verbose and print "Considering item $file\n"; my $name = $self->_path2modname( $file, $shortname, $modname_bits ); $verbose > 0.01 and print " Nominating $file as $name\n"; if($limit_re and $name !~ m/$limit_re/i) { $verbose and print "Shunning $name as not matching $limit_re\n"; return; } if( !$shadows and $seen->($name) ) { $verbose and print "Not worth considering $file ", "-- already saw $name as ", join(' ', $files_for->($name)), "\n"; return; } # Put off until as late as possible the expense of # actually reading the file: $progress and $progress->reach($self->{'_scan_count'}, "Scanning $file"); return unless $self->contains_pod( $file ); ++ $self->{'_scan_count'}; # Or finally take note of it: if ( my $prev = $seen->($name) ) { $verbose and print "Duplicate POD found (shadowing?): $name ($file)\n", " Already seen in ", join(' ', $files_for->($name)), "\n"; } else { $remember->($name, $file); # Noting just the first occurrence } $verbose and print " Noting $name = $file\n"; if( $callback ) { local $_ = $_; # insulate from changes, just in case $callback->($file, $name); } $path2name->{$file} = $name; return; } } #========================================================================== sub _path2modname { my($self, $file, $shortname, $modname_bits) = @_; # this code simplifies the POD name for Perl modules: # * remove "site_perl" # * remove e.g. "i586-linux" (from 'archname') # * remove e.g. 5.00503 # * remove pod/ if followed by perl.pod (e.g. in pod/perlfunc.pod) # dig into the file for case-preserved name if not already mixed case my @m = @$modname_bits; my $x; my $verbose = $self->verbose; # Shaving off leading naughty-bits while(@m and defined($x = lc( $m[0] )) and( $x eq 'site_perl' or($x =~ m/^pods?$/ and @m == 1 and $shortname =~ m{^perl.\.pod$}s ) or $x =~ m{\\d+\\.z\\d+([_.]?\\d+)?} # if looks like a vernum or $x eq lc( $Config::Config{'archname'} ) )) { shift @m } my $name = join '::', @m, $shortname; $self->_simplify_base($name); # On VMS, case-preserved document names can't be constructed from # filenames, so try to extract them from the "=head1 NAME" tag in the # file instead. if ($^O eq 'VMS' && ($name eq lc($name) \|\| $name eq uc($name))) { open PODFILE, "<$file" or die "_path2modname: Can't open $file: $!"; my $in_pod = 0; my $in_name = 0; my $line; while ($line = <PODFILE>) { chomp $line; $in_pod = 1 if ($line =~ m/^=\w/); $in_pod = 0 if ($line =~ m/^=cut/); next unless $in_pod; # skip non-pod text next if ($line =~ m/^\s\z/); # and blank lines next if ($in_pod && ($line =~ m/^X</)); # and commands if ($in_name) { if ($line =~ m/(\w+::)?(\w+)/) { # substitute case-preserved version of name my $podname = $2; my $prefix = $1 \|\| ''; $verbose and print "Attempting case restore of '$name' from '$prefix$podname'\n"; unless ($name =~ s/$prefix$podname/$prefix$podname/i) { $verbose and print "Attempting case restore of '$name' from '$podname'\n"; $name =~ s/$podname/$podname/i; } last; } } $in_name = 1 if ($line =~ m/^=head1 NAME/); } close PODFILE; } return $name; } #========================================================================== sub _recurse_dir { my($self, $startdir, $callback, $modname_bits) = @_; my $maxdepth = $self->{'fs_recursion_maxdepth'} \|\| 10; my $verbose = $self->verbose; my $here_string = File::Spec->curdir; my $up_string = File::Spec->updir; $modname_bits \|\|= []; my $recursor; $recursor = sub { my($dir_long, $dir_bare) = @_; if( @$modname_bits >= 10 ) { $verbose and print "Too deep! [@$modname_bits]\n"; return; } unless(-d $dir_long) { $verbose > 2 and print "But it's not a dir! $dir_long\n"; return; } unless( opendir(INDIR, $dir_long) ) { $verbose > 2 and print "Can't opendir $dir_long : $!\n"; closedir(INDIR); return } # Load all items; put no extension before .pod before .pm before .plx?. my @items = map { $_->[0] } sort { $a->[1] cmp $b->[1] \|\| $b->[2] cmp $a->[2] } map { (my $t = $_) =~ s/[.]p(m\|lx?\|od)\z//; [$_, $t, lc($1 \|\| 'z') ] } readdir(INDIR); closedir(INDIR); push @$modname_bits, $dir_bare unless $dir_bare eq ''; my $i_full; foreach my $i (@items) { next if $i eq $here_string or $i eq $up_string or $i eq ''; $i_full = File::Spec->catfile( $dir_long, $i ); if(!-r $i_full) { $verbose and print "Skipping unreadable $i_full\n"; } elsif(-f $i_full) { $_ = $i; $callback->( $i_full, $i, 0, $modname_bits ); } elsif(-d _) { $i =~ s/\.DIR\z//i if $^O eq 'VMS'; $_ = $i; my $rv = $callback->( $i_full, $i, 1, $modname_bits ) \|\| ''; if($rv eq 'PRUNE') { $verbose > 1 and print "OK, pruning"; } else { # Otherwise, recurse into it $recursor->( File::Spec->catdir($dir_long, $i) , $i); } } else { $verbose > 1 and print "Skipping oddity $i_full\n"; } } pop @$modname_bits; return; };; local $_; $recursor->($startdir, ''); undef $recursor; # allow it to be GC'd return; } #========================================================================== sub run { # A function, useful in one-liners my $self = __PACKAGE__->new; $self->limit_glob($ARGV[0]) if @ARGV; $self->callback( sub { my($file, $name) = @_; my $version = ''; # Yes, I know we won't catch the version in like a File/Thing.pm # if we see File/Thing.pod first. That's just the way the # cookie crumbles. -- SMB if($file =~ m/\.pod$/i) { # Don't bother looking for $VERSION in .pod files DEBUG and print "Not looking for \$VERSION in .pod $file\n"; } elsif( !open(INPOD, $file) ) { DEBUG and print "Couldn't open $file: $!\n"; close(INPOD); } else { # Sane case: file is readable my $lines = 0; while(<INPOD>) { last if $lines++ > $MAX_VERSION_WITHIN; # some degree of sanity if( s/^\s\$VERSION\s=\s//s and m/\d/ ) { DEBUG and print "Found version line (#$lines): $_"; s/\s\#.//s; s/\;\s$//s; s/\s+$//s; s/\t+/ /s; # nix tabs # Optimize the most common cases: $_ = "v$1" if m{^v?["']?([0-9_]+(\.[0-9_]+))["']?$}s # like in $VERSION = "3.14159"; or m{\$Revision:\s([0-9_]+(?:\.[0-9_]+))\s\$}s # like in sprintf("%d.%02d", q$Revision: 4.13 $ =~ /(\d+)\.(\d+)/); ; # Like in sprintf("%d.%s", map {s/_//g; $_} q$Name: release-0_55-public $ =~ /-(\d+)_([\d_]+)/) $_ = sprintf("v%d.%s", map {s/_//g; $_} $1 =~ m/-(\d+)_([\d_]+)/) # snare just the numeric part if m{\$Name:\s([^\$]+)\$}s ; $version = $_; DEBUG and print "Noting $version as version\n"; last; } } close(INPOD); } print "$name\t$version\t$file\n"; return; # End of callback! }); $self->survey; } #========================================================================== sub simplify_name { my($self, $str) = @_; # Remove all path components # XXX Why not just use basename()? -- SMB if ($^O eq 'MacOS') { $str =~ s{^.:+}{}s } else { $str =~ s{^./+}{}s } $self->_simplify_base($str); return $str; } #========================================================================== sub _simplify_base { # Internal method only # strip Perl's own extensions $_[1] =~ s/\.(pod\|pm\|plx?)\z//i; # strip meaningless extensions on Win32 and OS/2 $_[1] =~ s/\.(bat\|exe\|cmd)\z//i if $^O =~ /mswin\|os2/i; # strip meaningless extensions on VMS $_[1] =~ s/\.(com)\z//i if $^O eq 'VMS'; return; } #========================================================================== sub _expand_inc { my($self, $search_dirs) = @_; return unless $self->{'inc'}; my %seen = map { File::Spec->rel2abs($_) => 1 } @{ $search_dirs }; if ($^O eq 'MacOS') { push @$search_dirs, grep { !$seen{ File::Spec->rel2abs($_) }++ } $self->_mac_whammy(@INC); # Any other OSs need custom handling here? } else { push @$search_dirs, grep { !$seen{ File::Spec->rel2abs($_) }++ } @INC; } $self->{'laborious'} = 0; # Since inc said to use INC return; } #========================================================================== sub _mac_whammy { # Tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS my @them; (undef,@them) = @_; for $_ (@them) { if ( $_ eq '.' ) { $_ = ':'; } elsif ( $_ =~ s\|^((?:\.\./)+)\|':' x (length($1)/3)\|e ) { $_ = ':'. $_; } else { $_ =~ s\|^\./\|:\|; } } return @them; } #========================================================================== sub _limit_glob_to_limit_re { my $self = $_[0]; my $limit_glob = $self->{'limit_glob'} \|\| return; my $limit_re = '^' . quotemeta($limit_glob) . '$'; $limit_re =~ s/\\\?/./g; # glob "?" => "." $limit_re =~ s/\\\/.?/g; # glob "" => ".?" $limit_re =~ s/\.\\?\$$//s; # final glob "" => ".?$" => "" $self->{'verbose'} and print "Turning limit_glob $limit_glob into re $limit_re\n"; # A common optimization: if(!exists($self->{'dir_prefix'}) and $limit_glob =~ m/^(?:\w+\:\:)+/s # like "File::" or "File::Thing" # Optimize for sane and common cases (but not things like "::File") ) { $self->{'dir_prefix'} = join "::", $limit_glob =~ m/^(?:\w+::)+/sg; $self->{'verbose'} and print " and setting dir_prefix to $self->{'dir_prefix'}\n"; } return $limit_re; } #========================================================================== # contribution mostly from Tim Jenness <t.jenness@jach.hawaii.edu> sub _actual_filenames { my $dir = shift; my $fn = lc shift; opendir my ($dh), $dir or return; return map { File::Spec->catdir($dir, $_) } grep { lc $_ eq $fn } readdir $dh; } sub find { my($self, $pod, @search_dirs) = @_; $self = $self->new unless ref $self; # tolerate being a class method # Check usage Carp::carp 'Usage: \$self->find($podname, ...)' unless defined $pod and length $pod; my $verbose = $self->verbose; # Split on :: and then join the name together using File::Spec my @parts = split /::/, $pod; $verbose and print "Chomping {$pod} => {@parts}\n"; #@search_dirs = File::Spec->curdir unless @search_dirs; $self->_expand_inc(\@search_dirs); # Add location of binaries such as pod2text: push @search_dirs, $Config::Config{'scriptdir'} if $self->inc; my %seen_dir; while (my $dir = shift @search_dirs ) { next unless defined $dir and length $dir; next if $seen_dir{$dir}; $seen_dir{$dir} = 1; unless(-d $dir) { print "Directory $dir does not exist\n" if $verbose; } print "Looking in directory $dir\n" if $verbose; my $fullname = File::Spec->catfile( $dir, @parts ); print "Filename is now $fullname\n" if $verbose; foreach my $ext ('', '.pod', '.pm', '.pl') { # possible extensions my $fullext = $fullname . $ext; if ( -f $fullext and $self->contains_pod($fullext) ) { print "FOUND: $fullext\n" if $verbose; if (@parts > 1 && lc $parts[0] eq 'pod' && $self->is_case_insensitive() && $ext eq '.pod') { # Well, this file could be for a program (perldoc) but we actually # want a module (Pod::Perldoc). So see if there is a .pm with the # proper casing. my $subdir = dirname $fullext; unless (grep { $fullext eq $_ } _actual_filenames $subdir, "$parts[-1].pod") { print "# Looking for alternate spelling in $subdir\n" if $verbose; # Try the .pm file. my $pm = $fullname . '.pm'; if ( -f $pm and $self->contains_pod($pm) ) { # Prefer the .pm if its case matches. if (grep { $pm eq $_ } _actual_filenames $subdir, "$parts[-1].pm") { print "FOUND: $fullext\n" if $verbose; return $pm; } } } } return $fullext; } } # Case-insensitively Look for ./pod directories and slip them in. for my $subdir ( _actual_filenames($dir, 'pods'), _actual_filenames($dir, 'pod') ) { if (-d $subdir) { $verbose and print "Noticing $subdir and looking there...\n"; unshift @search_dirs, $subdir; } } } return undef; } #========================================================================== sub contains_pod { my($self, $file) = @_; my $verbose = $self->{'verbose'}; # check for one line of POD $verbose > 1 and print " Scanning $file for pod...\n"; unless( open(MAYBEPOD,"<$file") ) { print "Error: $file is unreadable: $!\n"; return undef; } sleep($SLEEPY - 1) if $SLEEPY; # avoid totally hogging the processor on OSs with poor process control local $_; while( <MAYBEPOD> ) { if(m/^=(head\d\|pod\|over\|item)\b/s) { close(MAYBEPOD) \|\| die "Bizarre error closing $file: $!\nAborting"; chomp; $verbose > 1 and print " Found some pod ($_) in $file\n"; return 1; } } close(MAYBEPOD) \|\| die "Bizarre error closing $file: $!\nAborting"; $verbose > 1 and print " No POD in $file, skipping.\n"; return 0; } #========================================================================== sub _accessorize { # A simple-minded method-maker shift; no strict 'refs'; foreach my $attrname (@_) { {caller() . '::' . $attrname} = sub { use strict; $Carp::CarpLevel = 1, Carp::croak( "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)" ) unless (@_ == 1 or @_ == 2) and ref $_[0]; # Read access: return $_[0]->{$attrname} if @_ == 1; # Write access: $_[0]->{$attrname} = $_[1]; return $_[0]; # RETURNS MYSELF! }; } # Ya know, they say accessories make the ensemble! return; } #========================================================================== sub _state_as_string { my $self = $_[0]; return '' unless ref $self; my @out = "{\n # State of $self ...\n"; foreach my $k (sort keys %$self) { push @out, " ", _esc($k), " => ", _esc($self->{$k}), ",\n"; } push @out, "}\n"; my $x = join '', @out; $x =~ s/^/#/mg; return $x; } sub _esc { my $in = $_[0]; return 'undef' unless defined $in; $in =~ s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> <'\\x'.(unpack("H2",$1))>eg; return qq{"$in"}; } #========================================================================== run() unless caller; # run if "perl whatever/Search.pm" 1; #========================================================================== __END__ =head1 NAME Pod::Simple::Search - find POD documents in directory trees =head1 SYNOPSIS use Pod::Simple::Search; my $name2path = Pod::Simple::Search->new->limit_glob('LWP::')->survey; print "Looky see what I found: ", join(' ', sort keys %$name2path), "\n"; print "LWPUA docs = ", Pod::Simple::Search->new->find('LWP::UserAgent') \|\| "?", "\n"; =head1 DESCRIPTION B<Pod::Simple::Search> is a class that you use for running searches for Pod files. An object of this class has several attributes (mostly options for controlling search options), and some methods for searching based on those attributes. The way to use this class is to make a new object of this class, set any options, and then call one of the search options (probably C<survey> or C<find>). The sections below discuss the syntaxes for doing all that. =head1 CONSTRUCTOR This class provides the one constructor, called C<new>. It takes no parameters: use Pod::Simple::Search; my $search = Pod::Simple::Search->new; =head1 ACCESSORS This class defines several methods for setting (and, occasionally, reading) the contents of an object. With two exceptions (discussed at the end of this section), these attributes are just for controlling the way searches are carried out. Note that each of these return C<$self> when you call them as C<< $self->I<whatever(value)> >>. That's so that you can chain together set-attribute calls like this: my $name2path = Pod::Simple::Search->new -> inc(0) -> verbose(1) -> callback(\&blab) ->survey(@there); ...which works exactly as if you'd done this: my $search = Pod::Simple::Search->new; $search->inc(0); $search->verbose(1); $search->callback(\&blab); my $name2path = $search->survey(@there); =over =item $search->inc( I<true-or-false> ); This attribute, if set to a true value, means that searches should implicitly add perl's I<@INC> paths. This automatically considers paths specified in the C<PERL5LIB> environment as this is prepended to I<@INC> by the Perl interpreter itself. This attribute's default value is B<TRUE>. If you want to search only specific directories, set $self->inc(0) before calling $inc->survey or $inc->find. =item $search->verbose( I<nonnegative-number> ); This attribute, if set to a nonzero positive value, will make searches output (via C<warn>) notes about what they're doing as they do it. This option may be useful for debugging a pod-related module. This attribute's default value is zero, meaning that no C<warn> messages are produced. (Setting verbose to 1 turns on some messages, and setting it to 2 turns on even more messages, i.e., makes the following search(es) even more verbose than 1 would make them.) =item $search->limit_glob( I<some-glob-string> ); This option means that you want to limit the results just to items whose podnames match the given glob/wildcard expression. For example, you might limit your search to just "LWP::", to search only for modules starting with "LWP::" (but not including the module "LWP" itself); or you might limit your search to "LW" to see only modules whose (full) names begin with "LW"; or you might search for "Find" to search for all modules with "Find" somewhere in their full name. (You can also use "?" in a glob expression; so "DB?" will match "DBI" and "DBD".) =item $search->callback( I<\&some_routine> ); This attribute means that every time this search sees a matching Pod file, it should call this callback routine. The routine is called with two parameters: the current file's filespec, and its pod name. (For example: C<("/etc/perljunk/File/Crunk.pm", "File::Crunk")> would be in C<@_>.) The callback routine's return value is not used for anything. This attribute's default value is false, meaning that no callback is called. =item $search->laborious( I<true-or-false> ); Unless you set this attribute to a true value, Pod::Search will apply Perl-specific heuristics to find the correct module PODs quickly. This attribute's default value is false. You won't normally need to set this to true. Specifically: Turning on this option will disable the heuristics for seeing only files with Perl-like extensions, omitting subdirectories that are numeric but do I<not> match the current Perl interpreter's version ID, suppressing F<site_perl> as a module hierarchy name, etc. =item $search->recurse( I<true-or-false> ); Unless you set this attribute to a false value, Pod::Search will recurse into subdirectories of the search directories. =item $search->shadows( I<true-or-false> ); Unless you set this attribute to a true value, Pod::Simple::Search will consider only the first file of a given modulename as it looks thru the specified directories; that is, with this option off, if Pod::Simple::Search has seen a C<somepathdir/Foo/Bar.pm> already in this search, then it won't bother looking at a C<somelaterpathdir/Foo/Bar.pm> later on in that search, because that file is merely a "shadow". But if you turn on C<< $self->shadows(1) >>, then these "shadow" files are inspected too, and are noted in the pathname2podname return hash. This attribute's default value is false; and normally you won't need to turn it on. =item $search->is_case_insensitive( I<true-or-false> ); Pod::Simple::Search will by default internally make an assumption based on the underlying filesystem where the class file is found whether it is case insensitive or not. If it is determined to be case insensitive, during survey() it may skip pod files/modules that happen to be equal to names it's already seen, ignoring case. However, it's possible to have distinct files in different directories that intentionally has the same name, just differing in case, that should be reported. Hence, you may force the behavior by setting this to true or false. =item $search->limit_re( I<some-regxp> ); Setting this attribute (to a value that's a regexp) means that you want to limit the results just to items whose podnames match the given regexp. Normally this option is not needed, and the more efficient C<limit_glob> attribute is used instead. =item $search->dir_prefix( I<some-string-value> ); Setting this attribute to a string value means that the searches should begin in the specified subdirectory name (like "Pod" or "File::Find", also expressible as "File/Find"). For example, the search option C<< $search->limit_glob("File::Find::R") >> is the same as the combination of the search options C<< $search->limit_re("^File::Find::R") -> dir_prefix("File::Find") >>. Normally you don't need to know about the C<dir_prefix> option, but I include it in case it might prove useful for someone somewhere. (Implementationally, searching with limit_glob ends up setting limit_re and usually dir_prefix.) =item $search->progress( I<some-progress-object> ); If you set a value for this attribute, the value is expected to be an object (probably of a class that you define) that has a C<reach> method and a C<done> method. This is meant for reporting progress during the search, if you don't want to use a simple callback. Normally you don't need to know about the C<progress> option, but I include it in case it might prove useful for someone somewhere. While a search is in progress, the progress object's C<reach> and C<done> methods are called like this: # Every time a file is being scanned for pod: $progress->reach($count, "Scanning $file"); ++$count; # And then at the end of the search: $progress->done("Noted $count Pod files total"); Internally, we often set this to an object of class Pod::Simple::Progress. That class is probably undocumented, but you may wish to look at its source. =item $name2path = $self->name2path; This attribute is not a search parameter, but is used to report the result of C<survey> method, as discussed in the next section. =item $path2name = $self->path2name; This attribute is not a search parameter, but is used to report the result of C<survey> method, as discussed in the next section. =back =head1 MAIN SEARCH METHODS Once you've actually set any options you want (if any), you can go ahead and use the following methods to search for Pod files in particular ways. =head2 C<< $search->survey( @directories ) >> The method C<survey> searches for POD documents in a given set of files and/or directories. This runs the search according to the various options set by the accessors above. (For example, if the C<inc> attribute is on, as it is by default, then the perl @INC directories are implicitly added to the list of directories (if any) that you specify.) The return value of C<survey> is two hashes: =over =item C<name2path> A hash that maps from each pod-name to the filespec (like "Stuff::Thing" => "/whatever/plib/Stuff/Thing.pm") =item C<path2name> A hash that maps from each Pod filespec to its pod-name (like "/whatever/plib/Stuff/Thing.pm" => "Stuff::Thing") =back Besides saving these hashes as the hashref attributes C<name2path> and C<path2name>, calling this function also returns these hashrefs. In list context, the return value of C<< $search->survey >> is the list C<(\%name2path, \%path2name)>. In scalar context, the return value is C<\%name2path>. Or you can just call this in void context. Regardless of calling context, calling C<survey> saves its results in its C<name2path> and C<path2name> attributes. E.g., when searching in F<$HOME/perl5lib>, the file F<$HOME/perl5lib/MyModule.pm> would get the POD name I<MyModule>, whereas F<$HOME/perl5lib/Myclass/Subclass.pm> would be I<Myclass::Subclass>. The name information can be used for POD translators. Only text files containing at least one valid POD command are found. In verbose mode, a warning is printed if shadows are found (i.e., more than one POD file with the same POD name is found, e.g. F<CPAN.pm> in different directories). This usually indicates duplicate occurrences of modules in the I<@INC> search path, which is occasionally inadvertent (but is often simply a case of a user's path dir having a more recent version than the system's general path dirs in general.) The options to this argument is a list of either directories that are searched recursively, or files. (Usually you wouldn't specify files, but just dirs.) Or you can just specify an empty-list, as in $name2path; with the C<inc> option on, as it is by default. The POD names of files are the plain basenames with any Perl-like extension (.pm, .pl, .pod) stripped, and path separators replaced by C<::>'s. Calling Pod::Simple::Search->search(...) is short for Pod::Simple::Search->new->search(...). That is, a throwaway object with default attribute values is used. =head2 C<< $search->simplify_name( $str ) >> The method B<simplify_name> is equivalent to B<basename>, but also strips Perl-like extensions (.pm, .pl, .pod) and extensions like F<.bat>, F<.cmd> on Win32 and OS/2, or F<.com> on VMS, respectively. =head2 C<< $search->find( $pod ) >> =head2 C<< $search->find( $pod, @search_dirs ) >> Returns the location of a Pod file, given a Pod/module/script name (like "Foo::Bar" or "perlvar" or "perldoc"), and an idea of what files/directories to look in. It searches according to the various options set by the accessors above. (For example, if the C<inc> attribute is on, as it is by default, then the perl @INC directories are implicitly added to the list of directories (if any) that you specify.) This returns the full path of the first occurrence to the file. Package names (eg 'A::B') are automatically converted to directory names in the selected directory. Additionally, '.pm', '.pl' and '.pod' are automatically appended to the search as required. (So, for example, under Unix, "A::B" is converted to "somedir/A/B.pm", "somedir/A/B.pod", or "somedir/A/B.pl", as appropriate.) If no such Pod file is found, this method returns undef. If any of the given search directories contains a F<pod/> subdirectory, then it is searched. (That's how we manage to find F<perlfunc>, for example, which is usually in F<pod/perlfunc> in most Perl dists.) The C<verbose> and C<inc> attributes influence the behavior of this search; notably, C<inc>, if true, adds @INC I<and also $Config::Config{'scriptdir'}> to the list of directories to search. It is common to simply say C<< $filename = Pod::Simple::Search-> new ->find("perlvar") >> so that just the @INC (well, and scriptdir) directories are searched. (This happens because the C<inc> attribute is true by default.) Calling Pod::Simple::Search->find(...) is short for Pod::Simple::Search->new->find(...). That is, a throwaway object with default attribute values is used. =head2 C<< $self->contains_pod( $file ) >> Returns true if the supplied filename (not POD module) contains some Pod documentation. =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org> with code borrowed from Marek Rouchal's L<Pod::Find>, which in turn heavily borrowed code from Nick Ing-Simmons' C<PodToHtml>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\�m�ȅ �� TranscodeDumb.pmnu��[�� require 5; ## This module is to be use()'d only by Pod::Simple::Transcode package Pod::Simple::TranscodeDumb; use strict; use vars qw($VERSION %Supported); $VERSION = '3.42'; # This module basically pretends it knows how to transcode, except # only for null-transcodings! We use this when Encode isn't # available. %Supported = ( 'ascii' => 1, 'ascii-ctrl' => 1, 'iso-8859-1' => 1, 'cp1252' => 1, 'null' => 1, 'latin1' => 1, 'latin-1' => 1, %Supported, ); sub is_dumb {1} sub is_smart {0} sub all_encodings { return sort keys %Supported; } sub encoding_is_available { return exists $Supported{lc $_[1]}; } sub encmodver { return __PACKAGE__ . " v" .($VERSION \|\| '?'); } sub make_transcoder { my ($e) = $_[1]; die "WHAT ENCODING!?!?" unless $e; # No-op for all but CP1252. return sub {;} if $e !~ /^cp-?1252$/i; # Replace CP1252 nerbles with their ASCII equivalents. return sub { # Copied from Encode::ZapCP1252. my %ascii_for = ( # http://en.wikipedia.org/wiki/Windows-1252 "\x80" => 'e', # EURO SIGN "\x82" => ',', # SINGLE LOW-9 QUOTATION MARK "\x83" => 'f', # LATIN SMALL LETTER F WITH HOOK "\x84" => ',,', # DOUBLE LOW-9 QUOTATION MARK "\x85" => '...', # HORIZONTAL ELLIPSIS "\x86" => '+', # DAGGER "\x87" => '++', # DOUBLE DAGGER "\x88" => '^', # MODIFIER LETTER CIRCUMFLEX ACCENT "\x89" => '%', # PER MILLE SIGN "\x8a" => 'S', # LATIN CAPITAL LETTER S WITH CARON "\x8b" => '<', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK "\x8c" => 'OE', # LATIN CAPITAL LIGATURE OE "\x8e" => 'Z', # LATIN CAPITAL LETTER Z WITH CARON "\x91" => "'", # LEFT SINGLE QUOTATION MARK "\x92" => "'", # RIGHT SINGLE QUOTATION MARK "\x93" => '"', # LEFT DOUBLE QUOTATION MARK "\x94" => '"', # RIGHT DOUBLE QUOTATION MARK "\x95" => '', # BULLET "\x96" => '-', # EN DASH "\x97" => '--', # EM DASH "\x98" => '~', # SMALL TILDE "\x99" => '(tm)', # TRADE MARK SIGN "\x9a" => 's', # LATIN SMALL LETTER S WITH CARON "\x9b" => '>', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK "\x9c" => 'oe', # LATIN SMALL LIGATURE OE "\x9e" => 'z', # LATIN SMALL LETTER Z WITH CARON "\x9f" => 'Y', # LATIN CAPITAL LETTER Y WITH DIAERESIS ); s{([\x80-\x9f])}{$ascii_for{$1} \|\| $1}emxsg for @_; }; } 1; PK��]"\5PJ�\��\��RTF.pmnu��[�� require 5; package Pod::Simple::RTF; #sub DEBUG () {4}; #sub Pod::Simple::DEBUG () {4}; #sub Pod::Simple::PullParser::DEBUG () {4}; use strict; use vars qw($VERSION @ISA %Escape $WRAP %Tagmap); $VERSION = '3.42'; use Pod::Simple::PullParser (); BEGIN {@ISA = ('Pod::Simple::PullParser')} use Carp (); BEGIN { DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } sub to_uni ($) { # Convert native code point to Unicode my $x = shift; # Broken for early EBCDICs $x = chr utf8::native_to_unicode(ord $x) if $] ge 5.007_003 && ord("A") != 65; return $x; } # We escape out 'F' so that we can send RTF files thru the mail without the # slightest worry that paragraphs beginning with "From" will get munged. # We also escape '\', '{', '}', and '_' my $map_to_self = ' !"#$%&\'()+,-./0123456789:;<=>?@ABCDEGHIJKLMNOPQRSTUVWXYZ[]^`abcdefghijklmnopqrstuvwxyz\|~'; $WRAP = 1 unless defined $WRAP; %Escape = ( # Start with every character mapping to its hex equivalent map( (chr($_) => sprintf("\\'%02x", $_)), 0 .. 0xFF), # Override most ASCII printables with themselves (or on non-ASCII platforms, # their ASCII values. This is because the output is UTF-16, which is always # based on Unicode code points) map( ( substr($map_to_self, $_, 1) => to_uni(substr($map_to_self, $_, 1))), 0 .. length($map_to_self) - 1), # And some refinements: "\r" => "\n", "\cj" => "\n", "\n" => "\n\\line ", "\t" => "\\tab ", # Tabs (altho theoretically raw \t's are okay) "\f" => "\n\\page\n", # Formfeed "-" => "\\_", # Turn plaintext '-' into a non-breaking hyphen $Pod::Simple::nbsp => "\\~", # Latin-1 non-breaking space $Pod::Simple::shy => "\\-", # Latin-1 soft (optional) hyphen # CRAZY HACKS: "\n" => "\\line\n", "\r" => "\n", "\cb" => "{\n\\cs21\\lang1024\\noproof ", # \\cf1 "\cc" => "}", ); # Generate a string of all the characters in %Escape that don't map to # themselves. First, one without the hyphen, then one with. my $escaped_sans_hyphen = ""; $escaped_sans_hyphen .= $_ for grep { $_ ne $Escape{$_} && $_ ne '-' } sort keys %Escape; my $escaped = "-$escaped_sans_hyphen"; # Then convert to patterns $escaped_sans_hyphen = qr/[\Q$escaped_sans_hyphen \E]/; $escaped= qr/[\Q$escaped\E]/; #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub _openclose { return map {; m/^([-A-Za-z]+)=(\w[^\=])$/s or die "what's <$_>?"; ( $1, "{\\$2\n", "/$1", "}" ); } @_; } my @_to_accept; %Tagmap = ( # 'foo=bar' means ('foo' => '{\bar'."\n", '/foo' => '}') _openclose( 'B=cs18\b', 'I=cs16\i', 'C=cs19\f1\lang1024\noproof', 'F=cs17\i\lang1024\noproof', 'VerbatimI=cs26\i', 'VerbatimB=cs27\b', 'VerbatimBI=cs28\b\i', map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ } qw[ underline=ul smallcaps=scaps shadow=shad superscript=super subscript=sub strikethrough=strike outline=outl emboss=embo engrave=impr dotted-underline=uld dash-underline=uldash dot-dash-underline=uldashd dot-dot-dash-underline=uldashdd double-underline=uldb thick-underline=ulth word-underline=ulw wave-underline=ulwave ] # But no double-strikethrough, because MSWord can't agree with the # RTF spec on whether it's supposed to be \strikedl or \striked1 (!!!) ), # Bit of a hack here: 'L=pod' => '{\cs22\i'."\n", 'L=url' => '{\cs23\i'."\n", 'L=man' => '{\cs24\i'."\n", '/L' => '}', 'Data' => "\n", '/Data' => "\n", 'Verbatim' => "\n{\\pard\\li#rtfindent##rtfkeep#\\plain\\s20\\sa180\\f1\\fs18\\lang1024\\noproof\n", '/Verbatim' => "\n\\par}\n", 'VerbatimFormatted' => "\n{\\pard\\li#rtfindent##rtfkeep#\\plain\\s20\\sa180\\f1\\fs18\\lang1024\\noproof\n", '/VerbatimFormatted' => "\n\\par}\n", 'Para' => "\n{\\pard\\li#rtfindent#\\sa180\n", '/Para' => "\n\\par}\n", 'head1' => "\n{\\pard\\li#rtfindent#\\s31\\keepn\\sb90\\sa180\\f2\\fs#head1_halfpoint_size#\\ul{\n", '/head1' => "\n}\\par}\n", 'head2' => "\n{\\pard\\li#rtfindent#\\s32\\keepn\\sb90\\sa180\\f2\\fs#head2_halfpoint_size#\\ul{\n", '/head2' => "\n}\\par}\n", 'head3' => "\n{\\pard\\li#rtfindent#\\s33\\keepn\\sb90\\sa180\\f2\\fs#head3_halfpoint_size#\\ul{\n", '/head3' => "\n}\\par}\n", 'head4' => "\n{\\pard\\li#rtfindent#\\s34\\keepn\\sb90\\sa180\\f2\\fs#head4_halfpoint_size#\\ul{\n", '/head4' => "\n}\\par}\n", # wordpad borks on \tc\tcl1, or I'd put that in =head1 and =head2 'item-bullet' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n", '/item-bullet' => "\n\\par}\n", 'item-number' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n", '/item-number' => "\n\\par}\n", 'item-text' => "\n{\\pard\\li#rtfindent##rtfitemkeepn#\\sb60\\sa150\\fi-120\n", '/item-text' => "\n\\par}\n", # we don't need any styles for over-* and /over-* ); #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub new { my $new = shift->SUPER::new(@_); $new->nix_X_codes(1); $new->nbsp_for_S(1); $new->accept_targets( 'rtf', 'RTF' ); $new->{'Tagmap'} = {%Tagmap}; $new->accept_codes(@_to_accept); $new->accept_codes('VerbatimFormatted'); DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n"; $new->doc_lang( ( $ENV{'RTFDEFLANG'} \|\| '') =~ m/^(\d{1,10})$/s ? $1 : ($ENV{'RTFDEFLANG'} \|\| '') =~ m/^0?x([a-fA-F0-9]{1,10})$/s ? hex($1) # yes, tolerate hex! : ($ENV{'RTFDEFLANG'} \|\| '') =~ m/^([a-fA-F0-9]{4})$/s ? hex($1) # yes, tolerate even more hex! : '1033' ); $new->head1_halfpoint_size(32); $new->head2_halfpoint_size(28); $new->head3_halfpoint_size(25); $new->head4_halfpoint_size(22); $new->codeblock_halfpoint_size(18); $new->header_halfpoint_size(17); $new->normal_halfpoint_size(25); return $new; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ __PACKAGE__->_accessorize( 'doc_lang', 'head1_halfpoint_size', 'head2_halfpoint_size', 'head3_halfpoint_size', 'head4_halfpoint_size', 'codeblock_halfpoint_size', 'header_halfpoint_size', 'normal_halfpoint_size', 'no_proofing_exemptions', ); #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub run { my $self = $_[0]; return $self->do_middle if $self->bare_output; return $self->do_beginning && $self->do_middle && $self->do_end; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Match something like an identifier. Prefer XID if available, then plain ID, # then just ASCII my $id_re = Pod::Simple::BlackBox::my_qr('[\'_\p{XIDS}][\'\p{XIDC}]+', "ab"); $id_re = Pod::Simple::BlackBox::my_qr('[\'_\p{IDS}][\'\p{IDC}]+', "ab") unless $id_re; $id_re = qr/['_a-zA-Z]['a-zA-Z0-9_]+/ unless $id_re; sub do_middle { # the main work my $self = $_[0]; my $fh = $self->{'output_fh'}; my($token, $type, $tagname, $scratch); my @stack; my @indent_stack; $self->{'rtfindent'} = 0 unless defined $self->{'rtfindent'}; while($token = $self->get_token) { if( ($type = $token->type) eq 'text' ) { if( $self->{'rtfverbatim'} ) { DEBUG > 1 and print STDERR " $type " , $token->text, " in verbatim!\n"; rtf_esc(0, $scratch = $token->text); # 0 => Don't escape hyphen print $fh $scratch; next; } DEBUG > 1 and print STDERR " $type " , $token->text, "\n"; $scratch = $token->text; $scratch =~ tr/\t\cb\cc/ /d; $self->{'no_proofing_exemptions'} or $scratch =~ s/(?: ^ \| (?<=[\r\n\t "\[\<\(]) ) # start on whitespace, sequence-start, or quote ( # something looking like a Perl token: (?: [\$\@\:\<\\\_]\S+ # either starting with a sigil, etc. ) \| # or starting alpha, but containing anything strange: (?: ${id_re}[\$\@\:_<>\(\\\]\S+ ) ) /\cb$1\cc/xsg ; rtf_esc(1, $scratch); # 1 => escape hyphen $scratch =~ s/( [^\r\n]{65} # Snare 65 characters from a line [^\r\n ]{0,50} # and finish any current word ) (\ {1,10})(?![\r\n]) # capture some spaces not at line-end /$1$2\n/gx # and put a NL before those spaces if $WRAP; # This may wrap at well past the 65th column, but not past the 120th. print $fh $scratch; } elsif( $type eq 'start' ) { DEBUG > 1 and print STDERR " +$type ",$token->tagname, " (", map("<$_> ", %{$token->attr_hash}), ")\n"; if( ($tagname = $token->tagname) eq 'Verbatim' or $tagname eq 'VerbatimFormatted' ) { ++$self->{'rtfverbatim'}; my $next = $self->get_token; next unless defined $next; my $line_count = 1; if($next->type eq 'text') { my $t = $next->text_r; while( $$t =~ m/$/mg ) { last if ++$line_count > 15; # no point in counting further } DEBUG > 3 and print STDERR " verbatim line count: $line_count\n"; } $self->unget_token($next); $self->{'rtfkeep'} = ($line_count > 15) ? '' : '\keepn' ; } elsif( $tagname =~ m/^item-/s ) { my @to_unget; my $text_count_here = 0; $self->{'rtfitemkeepn'} = ''; # Some heuristics to stop item-'s functioning as subheadings # from getting split from the things they're subheadings for. # # It's not terribly pretty, but it really does make things pretty. # while(1) { push @to_unget, $self->get_token; pop(@to_unget), last unless defined $to_unget[-1]; # Erroneously used to be "unshift" instead of pop! Adds instead # of removes, and operates on the beginning instead of the end! if($to_unget[-1]->type eq 'text') { if( ($text_count_here += length ${$to_unget[-1]->text_r}) > 150 ){ DEBUG > 1 and print STDERR " item- is too long to be keepn'd.\n"; last; } } elsif (@to_unget > 1 and $to_unget[-2]->type eq 'end' and $to_unget[-2]->tagname =~ m/^item-/s ) { # Bail out here, after setting rtfitemkeepn yea or nay. $self->{'rtfitemkeepn'} = '\keepn' if $to_unget[-1]->type eq 'start' and $to_unget[-1]->tagname eq 'Para'; DEBUG > 1 and printf STDERR " item-* before %s(%s) %s keepn'd.\n", $to_unget[-1]->type, $to_unget[-1]->can('tagname') ? $to_unget[-1]->tagname : '', $self->{'rtfitemkeepn'} ? "gets" : "doesn't get"; last; } elsif (@to_unget > 40) { DEBUG > 1 and print STDERR " item-* now has too many tokens (", scalar(@to_unget), (DEBUG > 4) ? (q<: >, map($_->dump, @to_unget)) : (), ") to be keepn'd.\n"; last; # give up } # else keep while'ing along } # Now put it aaaaall back... $self->unget_token(@to_unget); } elsif( $tagname =~ m/^over-/s ) { push @stack, $1; push @indent_stack, int($token->attr('indent') * 4 * $self->normal_halfpoint_size); DEBUG and print STDERR "Indenting over $indent_stack[-1] twips.\n"; $self->{'rtfindent'} += $indent_stack[-1]; } elsif ($tagname eq 'L') { $tagname .= '=' . ($token->attr('type') \|\| 'pod'); } elsif ($tagname eq 'Data') { my $next = $self->get_token; next unless defined $next; unless( $next->type eq 'text' ) { $self->unget_token($next); next; } DEBUG and print STDERR " raw text ", $next->text, "\n"; printf $fh "\n" . $next->text . "\n"; next; } defined($scratch = $self->{'Tagmap'}{$tagname}) or next; $scratch =~ s/\#([^\#]+)\#/${$self}{$1}/g; # interpolate print $fh $scratch; if ($tagname eq 'item-number') { print $fh $token->attr('number'), ". \n"; } elsif ($tagname eq 'item-bullet') { print $fh "\\'", ord("_"), "\n"; #for funky testing: print $fh '', rtf_esc(1, "\x{4E4B}\x{9053}"); } } elsif( $type eq 'end' ) { DEBUG > 1 and print STDERR " -$type ",$token->tagname,"\n"; if( ($tagname = $token->tagname) =~ m/^over-/s ) { DEBUG and print STDERR "Indenting back $indent_stack[-1] twips.\n"; $self->{'rtfindent'} -= pop @indent_stack; pop @stack; } elsif( $tagname eq 'Verbatim' or $tagname eq 'VerbatimFormatted') { --$self->{'rtfverbatim'}; } defined($scratch = $self->{'Tagmap'}{"/$tagname"}) or next; $scratch =~ s/\#([^\#]+)\#/${$self}{$1}/g; # interpolate print $fh $scratch; } } return 1; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub do_beginning { my $self = $_[0]; my $fh = $self->{'output_fh'}; return print $fh join '', $self->doc_init, $self->font_table, $self->stylesheet, $self->color_table, $self->doc_info, $self->doc_start, "\n" ; } sub do_end { my $self = $_[0]; my $fh = $self->{'output_fh'}; return print $fh '}'; # that should do it } ########################################################################### sub stylesheet { return sprintf <<'END', {\stylesheet {\snext0 Normal;} {\\cs10 \additive Default Paragraph Font;} {\\cs16 \additive \i \sbasedon10 pod-I;} {\\cs17 \additive \i\lang1024\noproof \sbasedon10 pod-F;} {\\cs18 \additive \b \sbasedon10 pod-B;} {\\cs19 \additive \f1\lang1024\noproof\sbasedon10 pod-C;} {\s20\ql \li0\ri0\sa180\widctlpar\f1\fs%s\lang1024\noproof\sbasedon0 \snext0 pod-codeblock;} {\\cs21 \additive \lang1024\noproof \sbasedon10 pod-computerese;} {\\cs22 \additive \i\lang1024\noproof\sbasedon10 pod-L-pod;} {\\cs23 \additive \i\lang1024\noproof\sbasedon10 pod-L-url;} {\\cs24 \additive \i\lang1024\noproof\sbasedon10 pod-L-man;} {\\cs25 \additive \f1\lang1024\noproof\sbasedon0 pod-codelbock-plain;} {\\cs26 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-ital;} {\\cs27 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold;} {\\cs28 \additive \f1\lang1024\noproof\sbasedon25 pod-codelbock-bold-ital;} {\s31\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head1;} {\s32\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head2;} {\s33\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head3;} {\s34\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 pod-head4;} } END $_[0]->codeblock_halfpoint_size(), $_[0]->head1_halfpoint_size(), $_[0]->head2_halfpoint_size(), $_[0]->head3_halfpoint_size(), $_[0]->head4_halfpoint_size(), ; } ########################################################################### # Override these as necessary for further customization sub font_table { return <<'END'; # text font, code font, heading font {\fonttbl {\f0\froman Times New Roman;} {\f1\fmodern Courier New;} {\f2\fswiss Arial;} } END } sub doc_init { return <<'END'; {\rtf1\ansi\deff0 END } sub color_table { return <<'END'; {\colortbl;\red255\green0\blue0;\red0\green0\blue255;} END } sub doc_info { my $self = $_[0]; my $class = ref($self) \|\| $self; my $tag = __PACKAGE__ . ' ' . $VERSION; unless($class eq __PACKAGE__) { $tag = " ($tag)"; $tag = " v" . $self->VERSION . $tag if defined $self->VERSION; $tag = $class . $tag; } return sprintf <<'END', {\info{\doccomm %s using %s v%s under Perl v%s at %s GMT} {\author [see doc]}{\company [see doc]}{\operator [see doc]} } END # None of the following things should need escaping, I dare say! $tag, $ISA[0], $ISA[0]->VERSION(), $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} \|\| time)), ; } sub doc_start { my $self = $_[0]; my $title = $self->get_short_title(); DEBUG and print STDERR "Short Title: <$title>\n"; $title .= ' ' if length $title; $title =~ s/ $/ /s; $title =~ s/^ //s; $title =~ s/ $/, /s; # make sure it ends in a comma and a space, unless it's 0-length my $is_obviously_module_name; $is_obviously_module_name = 1 if $title =~ m/^\S+$/s and $title =~ m/::/s; # catches the most common case, at least DEBUG and print STDERR "Title0: <$title>\n"; $title = rtf_esc(1, $title); # 1 => escape hyphen DEBUG and print STDERR "Title1: <$title>\n"; $title = '\lang1024\noproof ' . $title if $is_obviously_module_name; return sprintf <<'END', \deflang%s\plain\lang%s\widowctrl {\header\pard\qr\plain\f2\fs%s %s p.\chpgn\par} \fs%s END ($self->doc_lang) x 2, $self->header_halfpoint_size, $title, $self->normal_halfpoint_size, ; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #------------------------------------------------------------------------- use integer; my $question_mark_code_points = Pod::Simple::BlackBox::my_qr('([^\x00-\x{D7FF}\x{E000}-\x{10FFFF}])', "\x{110000}"); my $plane0 = Pod::Simple::BlackBox::my_qr('([\x{100}-\x{FFFF}])', "\x{100}"); my $other_unicode = Pod::Simple::BlackBox::my_qr('([\x{10000}-\x{10FFFF}])', "\x{10000}"); sub esc_uni($) { use if $] le 5.006002, 'utf8'; my $x = shift; # The output is expected to be UTF-16. Surrogates and above-Unicode get # mapped to '?' $x =~ s/$question_mark_code_points/?/g if $question_mark_code_points; # Non-surrogate Plane 0 characters get mapped to their code points. But # the standard calls for a 16bit SIGNED value. $x =~ s/$plane0/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg if $plane0; # Use surrogate pairs for the rest $x =~ s/$other_unicode/'\\uc1\\u' . ((ord($1) >> 10) + 0xD7C0 - 65536) . '\\u' . (((ord$1) & 0x03FF) + 0xDC00 - 65536) . '?'/eg if $other_unicode; return $x; } sub rtf_esc ($$) { # The parameter is true if we should escape hyphens my $escape_re = ((shift) ? $escaped : $escaped_sans_hyphen); # When false, it doesn't change "-" to hard-hyphen. # We don't want to change the "-" to hard-hyphen, because we want to # be able to paste this into a file and run it without there being # dire screaming about the mysterious hard-hyphen character (which # looks just like a normal dash character). # XXX The comments used to claim that when false it didn't apply computerese # style-smarts, but khw didn't see this actually my $x; # scratch if(!defined wantarray) { # void context: alter in-place! for(@_) { s/($escape_re)/$Escape{$1}/g; # ESCAPER $_ = esc_uni($_); } return; } elsif(wantarray) { # return an array return map {; ($x = $_) =~ s/($escape_re)/$Escape{$1}/g; # ESCAPER $x = esc_uni($x); $x; } @_; } else { # return a single scalar ($x = ((@_ == 1) ? $_[0] : join '', @_) ) =~ s/($escape_re)/$Escape{$1}/g; # ESCAPER # Escape \, {, }, -, control chars, and 7f-ff. $x = esc_uni($x); return $x; } } 1; __END__ =head1 NAME Pod::Simple::RTF -- format Pod as RTF =head1 SYNOPSIS perl -MPod::Simple::RTF -e \ "exit Pod::Simple::RTF->filter(shift)->any_errata_seen" \ thingy.pod > thingy.rtf =head1 DESCRIPTION This class is a formatter that takes Pod and renders it as RTF, good for viewing/printing in MSWord, WordPad/write.exe, TextEdit, etc. This is a subclass of L<Pod::Simple> and inherits all its methods. =head1 FORMAT CONTROL ATTRIBUTES You can set these attributes on the parser object before you call C<parse_file> (or a similar method) on it: =over =item $parser->head1_halfpoint_size( I<halfpoint_integer> ); =item $parser->head2_halfpoint_size( I<halfpoint_integer> ); =item $parser->head3_halfpoint_size( I<halfpoint_integer> ); =item $parser->head4_halfpoint_size( I<halfpoint_integer> ); These methods set the size (in half-points, like 52 for 26-point) that these heading levels will appear as. =item $parser->codeblock_halfpoint_size( I<halfpoint_integer> ); This method sets the size (in half-points, like 21 for 10.5-point) that codeblocks ("verbatim sections") will appear as. =item $parser->header_halfpoint_size( I<halfpoint_integer> ); This method sets the size (in half-points, like 15 for 7.5-point) that the header on each page will appear in. The header is usually just "I<modulename> p. I<pagenumber>". =item $parser->normal_halfpoint_size( I<halfpoint_integer> ); This method sets the size (in half-points, like 26 for 13-point) that normal paragraphic text will appear in. =item $parser->no_proofing_exemptions( I<true_or_false> ); Set this value to true if you don't want the formatter to try putting a hidden code on all Perl symbols (as best as it can notice them) that labels them as being not in English, and so not worth spellchecking. =item $parser->doc_lang( I<microsoft_decimal_language_code> ) This sets the language code to tag this document as being in. By default, it is currently the value of the environment variable C<RTFDEFLANG>, or if that's not set, then the value 1033 (for US English). Setting this appropriately is useful if you want to use the RTF to spellcheck, and/or if you want it to hyphenate right. Here are some notable values: 1033 US English 2057 UK English 3081 Australia English 4105 Canada English 1034 Spain Spanish 2058 Mexico Spanish 1031 Germany German 1036 France French 3084 Canada French 1035 Finnish 1044 Norwegian (Bokmal) 2068 Norwegian (Nynorsk) =back If you are particularly interested in customizing this module's output even more, see the source and/or write to me. =head1 SEE ALSO L<Pod::Simple>, L<RTF::Writer>, L<RTF::Cookbook>, L<RTF::Document>, L<RTF::Generator> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\b\��# ��# ��PullParserTextToken.pmnu��[�� require 5; package Pod::Simple::PullParserTextToken; use Pod::Simple::PullParserToken (); use strict; use vars qw(@ISA $VERSION); @ISA = ('Pod::Simple::PullParserToken'); $VERSION = '3.42'; sub new { # Class->new(text); my $class = shift; return bless ['text', @_], ref($class) \|\| $class; } # Purely accessors: sub text { (@_ == 2) ? ($_[0][1] = $_[1]) : $_[0][1] } sub text_r { \ $_[0][1] } 1; __END__ =head1 NAME Pod::Simple::PullParserTextToken -- text-tokens from Pod::Simple::PullParser =head1 SYNOPSIS (See L<Pod::Simple::PullParser>) =head1 DESCRIPTION When you do $parser->get_token on a L<Pod::Simple::PullParser>, you might get an object of this class. This is a subclass of L<Pod::Simple::PullParserToken> and inherits all its methods, and adds these methods: =over =item $token->text This returns the text that this token holds. For example, parsing CZ<><foo> will return a C start-token, a text-token, and a C end-token. And if you want to get the "foo" out of the text-token, call C<< $token->text >> =item $token->text(I<somestring>) This changes the string that this token holds. You probably won't need to do this. =item $token->text_r() This returns a scalar reference to the string that this token holds. This can be useful if you don't want to memory-copy the potentially large text value (well, as large as a paragraph or a verbatim block) as calling $token->text would do. Or, if you want to alter the value, you can even do things like this: for ( ${ $token->text_r } ) { # Aliases it with $_ !! s/ The / the /g; # just for example if( 'A' eq chr(65) ) { # (if in an ASCII world) tr/\xA0/ /; tr/\xAD//d; } ...or however you want to alter the value... (Note that starting with Perl v5.8, you can use, e.g., my $nbsp = chr utf8::unicode_to_native(0xA0); s/$nbsp/ /g; to handle the above regardless if it's an ASCII world or not) } =back You're unlikely to ever need to construct an object of this class for yourself, but if you want to, call C<< Pod::Simple::PullParserTextToken->new( I<text> ) >> =head1 SEE ALSO L<Pod::Simple::PullParserToken>, L<Pod::Simple>, L<Pod::Simple::Subclassing> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\��h+��Debug.pmnu��[��require 5; package Pod::Simple::Debug; use strict; use vars qw($VERSION ); $VERSION = '3.42'; sub import { my($value,$variable); if(@_ == 2) { $value = $_[1]; } elsif(@_ == 3) { ($variable, $value) = @_[1,2]; ($variable, $value) = ($value, $variable) if defined $value and ref($value) eq 'SCALAR' and not(defined $variable and ref($variable) eq 'SCALAR') ; # tolerate getting it backwards unless( defined $variable and ref($variable) eq 'SCALAR') { require Carp; Carp::croak("Usage:\n use Pod::Simple::Debug (NUMVAL)\nor" . "\n use Pod::Simple::Debug (\\\$var, STARTNUMVAL)\nAborting"); } } else { require Carp; Carp::croak("Usage:\n use Pod::Simple::Debug (NUMVAL)\nor" . "\n use Pod::Simple::Debug (\\\$var, STARTNUMVAL)\nAborting"); } if( defined &Pod::Simple::DEBUG ) { require Carp; Carp::croak("It's too late to call Pod::Simple::Debug -- " . "Pod::Simple has already loaded\nAborting"); } $value = 0 unless defined $value; unless($value =~ m/^-?\d+$/) { require Carp; Carp::croak( "$value isn't a numeric value." . "\nUsage:\n use Pod::Simple::Debug (NUMVAL)\nor" . "\n use Pod::Simple::Debug (\\\$var, STARTNUMVAL)\nAborting"); } if( defined $variable ) { # make a not-really-constant Pod::Simple::DEBUG = sub () { $$variable } ; $$variable = $value; print STDERR "# Starting Pod::Simple::DEBUG = non-constant $variable with val $value\n"; } else { Pod::Simple::DEBUG = eval " sub () { $value } "; print STDERR "# Starting Pod::Simple::DEBUG = $value\n"; } require Pod::Simple; return; } 1; __END__ =head1 NAME Pod::Simple::Debug -- put Pod::Simple into trace/debug mode =head1 SYNOPSIS use Pod::Simple::Debug (5); # or some integer Or: my $debuglevel; use Pod::Simple::Debug (\$debuglevel, 0); ...some stuff that uses Pod::Simple to do stuff, but which you don't want debug output from... $debug_level = 4; ...some stuff that uses Pod::Simple to do stuff, but which you DO want debug output from... $debug_level = 0; =head1 DESCRIPTION This is an internal module for controlling the debug level (a.k.a. trace level) of Pod::Simple. This is of interest only to Pod::Simple developers. =head1 CAVEATS Note that you should load this module I<before> loading Pod::Simple (or any Pod::Simple-based class). If you try loading Pod::Simple::Debug after &Pod::Simple::DEBUG is already defined, Pod::Simple::Debug will throw a fatal error to the effect that "It's too late to call Pod::Simple::Debug". Note that the C<use Pod::Simple::Debug (\$x, I<somenum>)> mode will make Pod::Simple (et al) run rather slower, since &Pod::Simple::DEBUG won't be a constant sub anymore, and so Pod::Simple (et al) won't compile with constant-folding. =head1 GUTS Doing this: use Pod::Simple::Debug (5); # or some integer is basically equivalent to: BEGIN { sub Pod::Simple::DEBUG () {5} } # or some integer use Pod::Simple (); And this: use Pod::Simple::Debug (\$debug_level,0); # or some integer is basically equivalent to this: my $debug_level; BEGIN { $debug_level = 0 } BEGIN { sub Pod::Simple::DEBUG () { $debug_level } use Pod::Simple (); =head1 SEE ALSO L<Pod::Simple> The article "Constants in Perl", in I<The Perl Journal> issue 21. See L<http://interglacial.com/tpj/21/> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\��О��О��HTMLBatch.pmnu��[�� require 5; package Pod::Simple::HTMLBatch; use strict; use vars qw( $VERSION $HTML_RENDER_CLASS $HTML_EXTENSION $CSS $JAVASCRIPT $SLEEPY $SEARCH_CLASS @ISA ); $VERSION = '3.42'; @ISA = (); # Yup, we're NOT a subclass of Pod::Simple::HTML! # TODO: nocontents stylesheets. Strike some of the color variations? use Pod::Simple::HTML (); BEGIN {esc = \&Pod::Simple::HTML::esc } use File::Spec (); use Pod::Simple::Search; $SEARCH_CLASS \|\|= 'Pod::Simple::Search'; BEGIN { if(defined &DEBUG) { } # no-op elsif( defined &Pod::Simple::DEBUG ) { DEBUG = \&Pod::Simple::DEBUG } else { DEBUG = sub () {0}; } } $SLEEPY = 1 if !defined $SLEEPY and $^O =~ /mswin\|mac/i; # flag to occasionally sleep for $SLEEPY - 1 seconds. $HTML_RENDER_CLASS \|\|= "Pod::Simple::HTML"; # # Methods beginning with "_" are particularly internal and possibly ugly. # Pod::Simple::_accessorize( __PACKAGE__, 'verbose', # how verbose to be during batch conversion 'html_render_class', # what class to use to render 'search_class', # what to use to search for POD documents 'contents_file', # If set, should be the name of a file (in current directory) # to write the list of all modules to 'index', # will set $htmlpage->index(...) to this (true or false) 'progress', # progress object 'contents_page_start', 'contents_page_end', 'css_flurry', '_css_wad', 'javascript_flurry', '_javascript_wad', 'no_contents_links', # set to true to suppress automatic adding of << links. '_contents', ); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Just so we can run from the command line more easily sub go { @ARGV == 2 or die sprintf( "Usage: perl -M%s -e %s:go indirs outdir\n (or use \"\@INC\" for indirs)\n", __PACKAGE__, __PACKAGE__, ); if(defined($ARGV[1]) and length($ARGV[1])) { my $d = $ARGV[1]; -e $d or die "I see no output directory named \"$d\"\nAborting"; -d $d or die "But \"$d\" isn't a directory!\nAborting"; -w $d or die "Directory \"$d\" isn't writeable!\nAborting"; } __PACKAGE__->batch_convert(@ARGV); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub new { my $new = bless {}, ref($_[0]) \|\| $_[0]; $new->html_render_class($HTML_RENDER_CLASS); $new->search_class($SEARCH_CLASS); $new->verbose(1 + DEBUG); $new->_contents([]); $new->index(1); $new-> _css_wad([]); $new->css_flurry(1); $new->_javascript_wad([]); $new->javascript_flurry(1); $new->contents_file( 'index' . ($HTML_EXTENSION \|\| $Pod::Simple::HTML::HTML_EXTENSION) ); $new->contents_page_start( join "\n", grep $_, $Pod::Simple::HTML::Doctype_decl, "<html><head>", "<title>Perl Documentation</title>", $Pod::Simple::HTML::Content_decl, "</head>", "\n<body class='contentspage'>\n<h1>Perl Documentation</h1>\n" ); # override if you need a different title $new->contents_page_end( sprintf( "\n\n<p class='contentsfooty'>Generated by %s v%s under Perl v%s\n<br >At %s GMT.</p>\n\n</body></html>\n", esc( ref($new), eval {$new->VERSION} \|\| $VERSION, $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} \|\| time)), ))); return $new; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub muse { my $self = shift; if($self->verbose) { print 'T+', int(time() - $self->{'_batch_start_time'}), "s: ", @_, "\n"; } return 1; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub batch_convert { my($self, $dirs, $outdir) = @_; $self \|\|= __PACKAGE__; # tolerate being called as an optionless function $self = $self->new unless ref $self; # tolerate being used as a class method if(!defined($dirs) or $dirs eq '' or $dirs eq '@INC' ) { $dirs = ''; } elsif(ref $dirs) { # OK, it's an explicit set of dirs to scan, specified as an arrayref. } else { # OK, it's an explicit set of dirs to scan, specified as a # string like "/thing:/also:/whatever/perl" (":"-delim, as usual) # or, under MSWin, like "c:/thing;d:/also;c:/whatever/perl" (";"-delim!) require Config; my $ps = quotemeta( $Config::Config{'path_sep'} \|\| ":" ); $dirs = [ grep length($_), split qr/$ps/, $dirs ]; } $outdir = $self->filespecsys->curdir unless defined $outdir and length $outdir; $self->_batch_convert_main($dirs, $outdir); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _batch_convert_main { my($self, $dirs, $outdir) = @_; # $dirs is either false, or an arrayref. # $outdir is a pathspec. $self->{'_batch_start_time'} \|\|= time(); $self->muse( "= ", scalar(localtime) ); $self->muse( "Starting batch conversion to \"$outdir\"" ); my $progress = $self->progress; if(!$progress and $self->verbose > 0 and $self->verbose() <= 5) { require Pod::Simple::Progress; $progress = Pod::Simple::Progress->new( ($self->verbose < 2) ? () # Default omission-delay : ($self->verbose == 2) ? 1 # Reduce the omission-delay : 0 # Eliminate the omission-delay ); $self->progress($progress); } if($dirs) { $self->muse(scalar(@$dirs), " dirs to scan: @$dirs"); } else { $self->muse("Scanning \@INC. This could take a minute or two."); } my $mod2path = $self->find_all_pods($dirs ? $dirs : ()); $self->muse("Done scanning."); my $total = keys %$mod2path; unless($total) { $self->muse("No pod found. Aborting batch conversion.\n"); return $self; } $progress and $progress->goal($total); $self->muse("Now converting pod files to HTML.", ($total > 25) ? " This will take a while more." : () ); $self->_spray_css( $outdir ); $self->_spray_javascript( $outdir ); $self->_do_all_batch_conversions($mod2path, $outdir); $progress and $progress->done(sprintf ( "Done converting %d files.", $self->{"__batch_conv_page_count"} )); return $self->_batch_convert_finish($outdir); return $self; } sub _do_all_batch_conversions { my($self, $mod2path, $outdir) = @_; $self->{"__batch_conv_page_count"} = 0; foreach my $module (sort {lc($a) cmp lc($b)} keys %$mod2path) { $self->_do_one_batch_conversion($module, $mod2path, $outdir); sleep($SLEEPY - 1) if $SLEEPY; } return; } sub _batch_convert_finish { my($self, $outdir) = @_; $self->write_contents_file($outdir); $self->muse("Done with batch conversion. $$self{'__batch_conv_page_count'} files done."); $self->muse( "= ", scalar(localtime) ); $self->progress and $self->progress->done("All done!"); return; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _do_one_batch_conversion { my($self, $module, $mod2path, $outdir, $outfile) = @_; my $retval; my $total = scalar keys %$mod2path; my $infile = $mod2path->{$module}; my @namelets = grep m/\S/, split "::", $module; # this can stick around in the contents LoL my $depth = scalar @namelets; die "Contentless thingie?! $module $infile" unless @namelets; #sanity $outfile \|\|= do { my @n = @namelets; $n[-1] .= $HTML_EXTENSION \|\| $Pod::Simple::HTML::HTML_EXTENSION; $self->filespecsys->catfile( $outdir, @n ); }; my $progress = $self->progress; my $page = $self->html_render_class->new; if(DEBUG > 5) { $self->muse($self->{"__batch_conv_page_count"} + 1, "/$total: ", ref($page), " render ($depth) $module => $outfile"); } elsif(DEBUG > 2) { $self->muse($self->{"__batch_conv_page_count"} + 1, "/$total: $module => $outfile") } # Give each class a chance to init the converter: $page->batch_mode_page_object_init($self, $module, $infile, $outfile, $depth) if $page->can('batch_mode_page_object_init'); # Init for the index (TOC), too. $self->batch_mode_page_object_init($page, $module, $infile, $outfile, $depth) if $self->can('batch_mode_page_object_init'); # Now get busy... $self->makepath($outdir => \@namelets); $progress and $progress->reach($self->{"__batch_conv_page_count"}, "Rendering $module"); if( $retval = $page->parse_from_file($infile, $outfile) ) { ++ $self->{"__batch_conv_page_count"} ; $self->note_for_contents_file( \@namelets, $infile, $outfile ); } else { $self->muse("Odd, parse_from_file(\"$infile\", \"$outfile\") returned false."); } $page->batch_mode_page_object_kill($self, $module, $infile, $outfile, $depth) if $page->can('batch_mode_page_object_kill'); # The following isn't a typo. Note that it switches $self and $page. $self->batch_mode_page_object_kill($page, $module, $infile, $outfile, $depth) if $self->can('batch_mode_page_object_kill'); DEBUG > 4 and printf STDERR "%s %sb < $infile %s %sb\n", $outfile, -s $outfile, $infile, -s $infile ; undef($page); return $retval; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub filespecsys { $_[0]{'_filespecsys'} \|\| 'File::Spec' } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub note_for_contents_file { my($self, $namelets, $infile, $outfile) = @_; # I think the infile and outfile parts are never used. -- SMB # But it's handy to have them around for debugging. if( $self->contents_file ) { my $c = $self->_contents(); push @$c, [ join("::", @$namelets), $infile, $outfile, $namelets ] # 0 1 2 3 ; DEBUG > 3 and print STDERR "Noting @$c[-1]\n"; } return; } #_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- sub write_contents_file { my($self, $outdir) = @_; my $outfile = $self->_contents_filespec($outdir) \|\| return; $self->muse("Preparing list of modules for ToC"); my($toplevel, # maps toplevelbit => [all submodules] $toplevel_form_freq, # ends up being 'foo' => 'Foo' ) = $self->_prep_contents_breakdown; my $Contents = eval { $self->_wopen($outfile) }; if( $Contents ) { $self->muse( "Writing contents file $outfile" ); } else { warn "Couldn't write-open contents file $outfile: $!\nAbort writing to $outfile at all"; return; } $self->_write_contents_start( $Contents, $outfile, ); $self->_write_contents_middle( $Contents, $outfile, $toplevel, $toplevel_form_freq ); $self->_write_contents_end( $Contents, $outfile, ); return $outfile; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _write_contents_start { my($self, $Contents, $outfile) = @_; my $starter = $self->contents_page_start \|\| ''; { my $css_wad = $self->_css_wad_to_markup(1); if( $css_wad ) { $starter =~ s{(</head>)}{\n$css_wad\n$1}i; # otherwise nevermind } my $javascript_wad = $self->_javascript_wad_to_markup(1); if( $javascript_wad ) { $starter =~ s{(</head>)}{\n$javascript_wad\n$1}i; # otherwise nevermind } } unless(print $Contents $starter, "<dl class='superindex'>\n" ) { warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all"; close($Contents); return 0; } return 1; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _write_contents_middle { my($self, $Contents, $outfile, $toplevel2submodules, $toplevel_form_freq) = @_; foreach my $t (sort keys %$toplevel2submodules) { my @downlines = sort {$a->[-1] cmp $b->[-1]} @{ $toplevel2submodules->{$t} }; printf $Contents qq[<dt><a name="%s">%s</a></dt>\n<dd>\n], esc( $t, $toplevel_form_freq->{$t} ) ; my($path, $name); foreach my $e (@downlines) { $name = $e->[0]; $path = join( "/", '.', esc( @{$e->[3]} ) ) . ($HTML_EXTENSION \|\| $Pod::Simple::HTML::HTML_EXTENSION); print $Contents qq{ <a href="$path">}, esc($name), "</a>  \n"; } print $Contents "</dd>\n\n"; } return 1; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _write_contents_end { my($self, $Contents, $outfile) = @_; unless( print $Contents "</dl>\n", $self->contents_page_end \|\| '', ) { warn "Couldn't write to $outfile: $!"; } close($Contents) or warn "Couldn't close $outfile: $!"; return 1; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _prep_contents_breakdown { my($self) = @_; my $contents = $self->_contents; my %toplevel; # maps lctoplevelbit => [all submodules] my %toplevel_form_freq; # ends up being 'foo' => 'Foo' # (mapping anycase forms to most freq form) foreach my $entry (@$contents) { my $toplevel = $entry->[0] =~ m/^perl\w$/ ? 'perl_core_docs' # group all the perlwhatever docs together : $entry->[3][0] # normal case ; ++$toplevel_form_freq{ lc $toplevel }{ $toplevel }; push @{ $toplevel{ lc $toplevel } }, $entry; push @$entry, lc($entry->[0]); # add a sort-order key to the end } foreach my $toplevel (sort keys %toplevel) { my $fgroup = $toplevel_form_freq{$toplevel}; $toplevel_form_freq{$toplevel} = ( sort { $fgroup->{$b} <=> $fgroup->{$a} or $a cmp $b } keys %$fgroup # This hash is extremely unlikely to have more than 4 members, so this # sort isn't so very wasteful )[0]; } return(\%toplevel, \%toplevel_form_freq) if wantarray; return \%toplevel; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _contents_filespec { my($self, $outdir) = @_; my $outfile = $self->contents_file; return unless $outfile; return $self->filespecsys->catfile( $outdir, $outfile ); } #_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- sub makepath { my($self, $outdir, $namelets) = @_; return unless @$namelets > 1; for my $i (0 .. ($#$namelets - 1)) { my $dir = $self->filespecsys->catdir( $outdir, @$namelets[0 .. $i] ); if(-e $dir) { die "$dir exists but not as a directory!?" unless -d $dir; next; } DEBUG > 3 and print STDERR " Making $dir\n"; mkdir $dir, 0777 or die "Can't mkdir $dir: $!\nAborting" ; } return; } #_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- sub batch_mode_page_object_init { my $self = shift; my($page, $module, $infile, $outfile, $depth) = @_; # TODO: any further options to percolate onto this new object here? $page->default_title($module); $page->index( $self->index ); $page->html_css( $self-> _css_wad_to_markup($depth) ); $page->html_javascript( $self->_javascript_wad_to_markup($depth) ); $self->add_header_backlink($page, $module, $infile, $outfile, $depth); $self->add_footer_backlink($page, $module, $infile, $outfile, $depth); return $self; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub add_header_backlink { my $self = shift; return if $self->no_contents_links; my($page, $module, $infile, $outfile, $depth) = @_; $page->html_header_after_title( join '', $page->html_header_after_title \|\| '', qq[<p class="backlinktop"><b><a name="___top" href="], $self->url_up_to_contents($depth), qq[" accesskey="1" title="All Documents"><<</a></b></p>\n], ) if $self->contents_file ; return; } sub add_footer_backlink { my $self = shift; return if $self->no_contents_links; my($page, $module, $infile, $outfile, $depth) = @_; $page->html_footer( join '', qq[<p class="backlinkbottom"><b><a name="___bottom" href="], $self->url_up_to_contents($depth), qq[" title="All Documents"><<</a></b></p>\n], $page->html_footer \|\| '', ) if $self->contents_file ; return; } sub url_up_to_contents { my($self, $depth) = @_; --$depth; return join '/', ('..') x $depth, esc($self->contents_file); } #_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- sub find_all_pods { my($self, $dirs) = @_; # You can override find_all_pods in a subclass if you want to # do extra filtering or whatnot. But for the moment, we just # pass to modnames2paths: return $self->modnames2paths($dirs); } #_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- sub modnames2paths { # return a hashref mapping modulenames => paths my($self, $dirs) = @_; my $m2p; { my $search = $self->search_class->new; DEBUG and print STDERR "Searching via $search\n"; $search->verbose(1) if DEBUG > 10; $search->progress( $self->progress->copy->goal(0) ) if $self->progress; $search->shadows(0); # don't bother noting shadowed files $search->inc( $dirs ? 0 : 1 ); $search->survey( $dirs ? @$dirs : () ); $m2p = $search->name2path; die "What, no name2path?!" unless $m2p; } $self->muse("That's odd... no modules found!") unless keys %$m2p; if( DEBUG > 4 ) { print STDERR "Modules found (name => path):\n"; foreach my $m (sort {lc($a) cmp lc($b)} keys %$m2p) { print STDERR " $m $$m2p{$m}\n"; } print STDERR "(total ", scalar(keys %$m2p), ")\n\n"; } elsif( DEBUG ) { print STDERR "Found ", scalar(keys %$m2p), " modules.\n"; } $self->muse( "Found ", scalar(keys %$m2p), " modules." ); # return the Foo::Bar => /whatever/Foo/Bar.pod\|pm hashref return $m2p; } #=========================================================================== sub _wopen { # this is abstracted out so that the daemon class can override it my($self, $outpath) = @_; require Symbol; my $out_fh = Symbol::gensym(); DEBUG > 5 and print STDERR "Write-opening to $outpath\n"; return $out_fh if open($out_fh, "> $outpath"); require Carp; Carp::croak("Can't write-open $outpath: $!"); } #========================================================================== sub add_css { my($self, $url, $is_default, $name, $content_type, $media, $_code) = @_; return unless $url; unless($name) { # cook up a reasonable name based on the URL $name = $url; if( $name !~ m/\?/ and $name =~ m{([^/]+)$}s ) { $name = $1; $name =~ s/\.css//i; } } $media \|\|= 'all'; $content_type \|\|= 'text/css'; my $bunch = [$url, $name, $content_type, $media, $_code]; if($is_default) { unshift @{ $self->_css_wad }, $bunch } else { push @{ $self->_css_wad }, $bunch } return; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _spray_css { my($self, $outdir) = @_; return unless $self->css_flurry(); $self->_gen_css_wad(); my $lol = $self->_css_wad; foreach my $chunk (@$lol) { my $url = $chunk->[0]; my $outfile; if( ref($chunk->[-1]) and $url =~ m{^(_[-a-z0-9_]+\.css$)} ) { $outfile = $self->filespecsys->catfile( $outdir, "$1" ); DEBUG > 5 and print STDERR "Noting $$chunk[0] as a file I'll create.\n"; } else { DEBUG > 5 and print STDERR "OK, noting $$chunk[0] as an external CSS.\n"; # Requires no further attention. next; } #$self->muse( "Writing autogenerated CSS file $outfile" ); my $Cssout = $self->_wopen($outfile); print $Cssout ${$chunk->[-1]} or warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all"; close($Cssout); DEBUG > 5 and print STDERR "Wrote $outfile\n"; } return; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _css_wad_to_markup { my($self, $depth) = @_; my @css = @{ $self->_css_wad \|\| return '' }; return '' unless @css; my $rel = 'stylesheet'; my $out = ''; --$depth; my $uplink = $depth ? ('../' x $depth) : ''; foreach my $chunk (@css) { next unless $chunk and @$chunk; my( $url1, $url2, $title, $type, $media) = ( $self->_maybe_uplink( $chunk->[0], $uplink ), esc(grep !ref($_), @$chunk) ); $out .= qq{<link rel="$rel" title="$title" type="$type" href="$url1$url2" media="$media" >\n}; $rel = 'alternate stylesheet'; # alternates = all non-first iterations } return $out; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _maybe_uplink { # if the given URL looks relative, return the given uplink string -- # otherwise return emptystring my($self, $url, $uplink) = @_; ($url =~ m{^\./} or $url !~ m{[/\:]} ) ? $uplink : '' # qualify it, if/as needed } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _gen_css_wad { my $self = $_[0]; my $css_template = $self->_css_template; foreach my $variation ( # Commented out for sake of concision: # # 011n=black_with_red_on_white # 001n=black_with_yellow_on_white # 101n=black_with_green_on_white # 110=white_with_yellow_on_black # 010=white_with_green_on_black # 011=white_with_blue_on_black # 100=white_with_red_on_black '110n=blkbluw', # black_with_blue_on_white '010n=blkmagw', # black_with_magenta_on_white '100n=blkcynw', # black_with_cyan_on_white '101=whtprpk', # white_with_purple_on_black '001=whtnavk', # white_with_navy_blue_on_black '010a=grygrnk', # grey_with_green_on_black '010b=whtgrng', # white_with_green_on_grey '101an=blkgrng', # black_with_green_on_grey '101bn=grygrnw', # grey_with_green_on_white ) { my $outname = $variation; my($flipmode, @swap) = ( ($4 \|\| ''), $1,$2,$3) if $outname =~ s/^([012])([012])([[012])([a-z])=?//s; @swap = () if '010' eq join '', @swap; # 010 is a swop-no-op! my $this_css = "/ This file is autogenerated. Do not edit. $variation /\n\n" . $css_template; # Only look at three-digitty colors, for now at least. if( $flipmode =~ m/n/ ) { $this_css =~ s/(#[0-9a-fA-F]{3})\b/_color_negate($1)/eg; $this_css =~ s/\bthin\b/medium/g; } $this_css =~ s<#([0-9a-fA-F])([0-9a-fA-F])([0-9a-fA-F])\b> < join '', '#', ($1,$2,$3)[@swap] >eg if @swap; if( $flipmode =~ m/a/) { $this_css =~ s/#fff\b/#999/gi } # black -> dark grey elsif($flipmode =~ m/b/) { $this_css =~ s/#000\b/#666/gi } # white -> light grey my $name = $outname; $name =~ tr/-_/ /; $self->add_css( "_$outname.css", 0, $name, 0, 0, \$this_css); } # Now a few indexless variations: for (my ($outfile, $variation) = each %{{ blkbluw => 'black_with_blue_on_white', whtpurk => 'white_with_purple_on_black', whtgrng => 'white_with_green_on_grey', grygrnw => 'grey_with_green_on_white', }}) { my $this_css = join "\n", "/ This file is autogenerated. Do not edit. $outfile /\n", "\@import url(\"./_$variation.css\");", ".indexgroup { display: none; }", "\n", ; my $name = $outfile; $name =~ tr/-_/ /; $self->add_css( "_$outfile.css", 0, $name, 0, 0, \$this_css); } return; } sub _color_negate { my $x = lc $_[0]; $x =~ tr[0123456789abcdef] [fedcba9876543210]; return $x; } #=========================================================================== sub add_javascript { my($self, $url, $content_type, $_code) = @_; return unless $url; push @{ $self->_javascript_wad }, [ $url, $content_type \|\| 'text/javascript', $_code ]; return; } sub _spray_javascript { my($self, $outdir) = @_; return unless $self->javascript_flurry(); $self->_gen_javascript_wad(); my $lol = $self->_javascript_wad; foreach my $script (@$lol) { my $url = $script->[0]; my $outfile; if( ref($script->[-1]) and $url =~ m{^(_[-a-z0-9_]+\.js$)} ) { $outfile = $self->filespecsys->catfile( $outdir, "$1" ); DEBUG > 5 and print STDERR "Noting $$script[0] as a file I'll create.\n"; } else { DEBUG > 5 and print STDERR "OK, noting $$script[0] as an external JavaScript.\n"; next; } #$self->muse( "Writing JavaScript file $outfile" ); my $Jsout = $self->_wopen($outfile); print $Jsout ${$script->[-1]} or warn "Couldn't print to $outfile: $!\nAbort writing to $outfile at all"; close($Jsout); DEBUG > 5 and print STDERR "Wrote $outfile\n"; } return; } sub _gen_javascript_wad { my $self = $_[0]; my $js_code = $self->_javascript \|\| return; $self->add_javascript( "_podly.js", 0, \$js_code); return; } sub _javascript_wad_to_markup { my($self, $depth) = @_; my @scripts = @{ $self->_javascript_wad \|\| return '' }; return '' unless @scripts; my $out = ''; --$depth; my $uplink = $depth ? ('../' x $depth) : ''; foreach my $s (@scripts) { next unless $s and @$s; my( $url1, $url2, $type, $media) = ( $self->_maybe_uplink( $s->[0], $uplink ), esc(grep !ref($_), @$s) ); $out .= qq{<script type="$type" src="$url1$url2"></script>\n}; } return $out; } #=========================================================================== sub _css_template { return $CSS } sub _javascript { return $JAVASCRIPT } $CSS = <<'EOCSS'; / For accessibility reasons, never specify text sizes in px/pt/pc/in/cm/mm / @media all { .hide { display: none; } } @media print { .noprint, div.indexgroup, .backlinktop, .backlinkbottom { display: none } { border-color: black !important; color: black !important; background-color: transparent !important; background-image: none !important; } dl.superindex > dd { word-spacing: .6em; } } @media aural, braille, embossed { div.indexgroup { display: none; } /* Too noisy, don't you think? / dl.superindex > dt:before { content: "Group "; } dl.superindex > dt:after { content: " contains:"; } .backlinktop a:before { content: "Back to contents"; } .backlinkbottom a:before { content: "Back to contents"; } } @media aural { dl.superindex > dt { pause-before: 600ms; } } @media screen, tty, tv, projection { .noscreen { display: none; } a:link { color: #7070ff; text-decoration: underline; } a:visited { color: #e030ff; text-decoration: underline; } a:active { color: #800000; text-decoration: underline; } body.contentspage a { text-decoration: none; } a.u { color: #fff !important; text-decoration: none; } body.pod { margin: 0 5px; color: #fff; background-color: #000; } body.pod h1, body.pod h2, body.pod h3, body.pod h4, body.pod h5, body.pod h6 { font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif; font-weight: normal; margin-top: 1.2em; margin-bottom: .1em; border-top: thin solid transparent; / margin-left: -5px; border-left: 2px #7070ff solid; padding-left: 3px; / } body.pod h1 { border-top-color: #0a0; } body.pod h2 { border-top-color: #080; } body.pod h3 { border-top-color: #040; } body.pod h4 { border-top-color: #010; } body.pod h5 { border-top-color: #010; } body.pod h6 { border-top-color: #010; } p.backlinktop + h1 { border-top: none; margin-top: 0em; } p.backlinktop + h2 { border-top: none; margin-top: 0em; } p.backlinktop + h3 { border-top: none; margin-top: 0em; } p.backlinktop + h4 { border-top: none; margin-top: 0em; } p.backlinktop + h5 { border-top: none; margin-top: 0em; } p.backlinktop + h6 { border-top: none; margin-top: 0em; } body.pod dt { font-size: 105%; / just a wee bit more than normal / } .indexgroup { font-size: 80%; } .backlinktop, .backlinkbottom { margin-left: -5px; margin-right: -5px; background-color: #040; border-top: thin solid #050; border-bottom: thin solid #050; } .backlinktop a, .backlinkbottom a { text-decoration: none; color: #080; background-color: #000; border: thin solid #0d0; } .backlinkbottom { margin-bottom: 0; padding-bottom: 0; } .backlinktop { margin-top: 0; padding-top: 0; } body.contentspage { color: #fff; background-color: #000; } body.contentspage h1 { color: #0d0; margin-left: 1em; margin-right: 1em; text-indent: -.9em; font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif; font-weight: normal; border-top: thin solid #fff; border-bottom: thin solid #fff; text-align: center; } dl.superindex > dt { font-family: Tahoma, Verdana, Helvetica, Arial, sans-serif; font-weight: normal; font-size: 90%; margin-top: .45em; / margin-bottom: -.15em; / } dl.superindex > dd { word-spacing: .6em; / most important rule here! / } dl.superindex > a:link { text-decoration: none; color: #fff; } .contentsfooty { border-top: thin solid #999; font-size: 90%; } } / The End / EOCSS #========================================================================== $JAVASCRIPT = <<'EOJAVASCRIPT'; // From http://www.alistapart.com/articles/alternate/ function setActiveStyleSheet(title) { var i, a, main; for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) { if(a.getAttribute("rel").indexOf("style") != -1 && a.getAttribute("title")) { a.disabled = true; if(a.getAttribute("title") == title) a.disabled = false; } } } function getActiveStyleSheet() { var i, a; for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) { if( a.getAttribute("rel").indexOf("style") != -1 && a.getAttribute("title") && !a.disabled ) return a.getAttribute("title"); } return null; } function getPreferredStyleSheet() { var i, a; for(i=0 ; (a = document.getElementsByTagName("link")[i]) ; i++) { if( a.getAttribute("rel").indexOf("style") != -1 && a.getAttribute("rel").indexOf("alt") == -1 && a.getAttribute("title") ) return a.getAttribute("title"); } return null; } function createCookie(name,value,days) { if (days) { var date = new Date(); date.setTime(date.getTime()+(days2460601000)); var expires = "; expires="+date.toGMTString(); } else expires = ""; document.cookie = name+"="+value+expires+"; path=/"; } function readCookie(name) { var nameEQ = name + "="; var ca = document.cookie.split(';'); for(var i=0 ; i < ca.length ; i++) { var c = ca[i]; while (c.charAt(0)==' ') c = c.substring(1,c.length); if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length,c.length); } return null; } window.onload = function(e) { var cookie = readCookie("style"); var title = cookie ? cookie : getPreferredStyleSheet(); setActiveStyleSheet(title); } window.onunload = function(e) { var title = getActiveStyleSheet(); createCookie("style", title, 365); } var cookie = readCookie("style"); var title = cookie ? cookie : getPreferredStyleSheet(); setActiveStyleSheet(title); // The End EOJAVASCRIPT # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1; __END__ =head1 NAME Pod::Simple::HTMLBatch - convert several Pod files to several HTML files =head1 SYNOPSIS perl -MPod::Simple::HTMLBatch -e 'Pod::Simple::HTMLBatch::go' in out =head1 DESCRIPTION This module is used for running batch-conversions of a lot of HTML documents This class is NOT a subclass of Pod::Simple::HTML (nor of bad old Pod::Html) -- although it uses Pod::Simple::HTML for doing the conversion of each document. The normal use of this class is like so: use Pod::Simple::HTMLBatch; my $batchconv = Pod::Simple::HTMLBatch->new; $batchconv->some_option( some_value ); $batchconv->some_other_option( some_other_value ); $batchconv->batch_convert( \@search_dirs, $output_dir ); =head2 FROM THE COMMAND LINE Note that this class also provides (but does not export) the function Pod::Simple::HTMLBatch::go. This is basically just a shortcut for C<< Pod::Simple::HTMLBatch->batch_convert(@ARGV) >>. It's meant to be handy for calling from the command line. However, the shortcut requires that you specify exactly two command-line arguments, C<indirs> and C<outdir>. Example: % mkdir out_html % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go @INC out_html (to convert the pod from Perl's @INC files under the directory ./out_html) (Note that the command line there contains a literal atsign-I-N-C. This is handled as a special case by batch_convert, in order to save you having to enter the odd-looking "" as the first command-line parameter when you mean "just use whatever's in @INC".) Example: % mkdir ../seekrut % chmod og-rx ../seekrut % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go . ../seekrut (to convert the pod under the current dir into HTML files under the directory ./seekrut) Example: % perl -MPod::Simple::HTMLBatch -e Pod::Simple::HTMLBatch::go happydocs . (to convert all pod from happydocs into the current directory) =head1 MAIN METHODS =over =item $batchconv = Pod::Simple::HTMLBatch->new; This creates a new batch converter. The method doesn't take parameters. To change the converter's attributes, use the L<"/ACCESSOR METHODS"> below. =item $batchconv->batch_convert( I<indirs>, I<outdir> ); This searches the directories given in I<indirs> and writes HTML files for each of these to a corresponding directory in I<outdir>. The directory I<outdir> must exist. =item $batchconv->batch_convert( undef , ...); =item $batchconv->batch_convert( q{@INC}, ...); These two values for I<indirs> specify that the normal Perl @INC =item $batchconv->batch_convert( \@dirs , ...); This specifies that the input directories are the items in the arrayref C<\@dirs>. =item $batchconv->batch_convert( "somedir" , ...); This specifies that the director "somedir" is the input. (This can be an absolute or relative path, it doesn't matter.) A common value you might want would be just "." for the current directory: $batchconv->batch_convert( "." , ...); =item $batchconv->batch_convert( 'somedir:someother:also' , ...); This specifies that you want the dirs "somedir", "someother", and "also" scanned, just as if you'd passed the arrayref C<[qw( somedir someother also)]>. Note that a ":"-separator is normal under Unix, but Under MSWin, you'll need C<'somedir;someother;also'> instead, since the pathsep on MSWin is ";" instead of ":". (And I<that> is because ":" often comes up in paths, like C<"c:/perl/lib">.) (Exactly what separator character should be used, is gotten from C<$Config::Config{'path_sep'}>, via the L<Config> module.) =item $batchconv->batch_convert( ... , undef ); This specifies that you want the HTML output to go into the current directory. (Note that a missing or undefined value means a different thing in the first slot than in the second. That's so that C<batch_convert()> with no arguments (or undef arguments) means "go from @INC, into the current directory.) =item $batchconv->batch_convert( ... , 'somedir' ); This specifies that you want the HTML output to go into the directory 'somedir'. (This can be an absolute or relative path, it doesn't matter.) =back Note that you can also call C<batch_convert> as a class method, like so: Pod::Simple::HTMLBatch->batch_convert( ... ); That is just short for this: Pod::Simple::HTMLBatch-> new-> batch_convert(...); That is, it runs a conversion with default options, for whatever inputdirs and output dir you specify. =head2 ACCESSOR METHODS The following are all accessor methods -- that is, they don't do anything on their own, but just alter the contents of the conversion object, which comprises the options for this particular batch conversion. We show the "put" form of the accessors below (i.e., the syntax you use for setting the accessor to a specific value). But you can also call each method with no parameters to get its current value. For example, C<< $self->contents_file() >> returns the current value of the contents_file attribute. =over =item $batchconv->verbose( I<nonnegative_integer> ); This controls how verbose to be during batch conversion, as far as notes to STDOUT (or whatever is C<select>'d) about how the conversion is going. If 0, no progress information is printed. If 1 (the default value), some progress information is printed. Higher values print more information. =item $batchconv->index( I<true-or-false> ); This controls whether or not each HTML page is liable to have a little table of contents at the top (which we call an "index" for historical reasons). This is true by default. =item $batchconv->contents_file( I<filename> ); If set, should be the name of a file (in the output directory) to write the HTML index to. The default value is "index.html". If you set this to a false value, no contents file will be written. =item $batchconv->contents_page_start( I<HTML_string> ); This specifies what string should be put at the beginning of the contents page. The default is a string more or less like this: <html> <head><title>Perl Documentation</title></head> <body class='contentspage'> <h1>Perl Documentation</h1> =item $batchconv->contents_page_end( I<HTML_string> ); This specifies what string should be put at the end of the contents page. The default is a string more or less like this: <p class='contentsfooty'>Generated by Pod::Simple::HTMLBatch v3.01 under Perl v5.008 <br >At Fri May 14 22:26:42 2004 GMT, which is Fri May 14 14:26:42 2004 local time.</p> =item $batchconv->add_css( $url ); TODO =item $batchconv->add_javascript( $url ); TODO =item $batchconv->css_flurry( I<true-or-false> ); If true (the default value), we autogenerate some CSS files in the output directory, and set our HTML files to use those. TODO: continue =item $batchconv->javascript_flurry( I<true-or-false> ); If true (the default value), we autogenerate a JavaScript in the output directory, and set our HTML files to use it. Currently, the JavaScript is used only to get the browser to remember what stylesheet it prefers. TODO: continue =item $batchconv->no_contents_links( I<true-or-false> ); TODO =item $batchconv->html_render_class( I<classname> ); This sets what class is used for rendering the files. The default is "Pod::Simple::HTML". If you set it to something else, it should probably be a subclass of Pod::Simple::HTML, and you should C<require> or C<use> that class so that's it's loaded before Pod::Simple::HTMLBatch tries loading it. =item $batchconv->search_class( I<classname> ); This sets what class is used for searching for the files. The default is "Pod::Simple::Search". If you set it to something else, it should probably be a subclass of Pod::Simple::Search, and you should C<require> or C<use> that class so that's it's loaded before Pod::Simple::HTMLBatch tries loading it. =back =head1 NOTES ON CUSTOMIZATION TODO call add_css($someurl) to add stylesheet as alternate call add_css($someurl,1) to add as primary stylesheet call add_javascript subclass Pod::Simple::HTML and set $batchconv->html_render_class to that classname and maybe override $page->batch_mode_page_object_init($self, $module, $infile, $outfile, $depth) or maybe override $batchconv->batch_mode_page_object_init($page, $module, $infile, $outfile, $depth) subclass Pod::Simple::Search and set $batchconv->search_class to that classname =head1 SEE ALSO L<Pod::Simple>, L<Pod::Simple::HTMLBatch>, L<perlpod>, L<perlpodspec> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\� ,�� Methody.pmnu��[�� require 5; package Pod::Simple::Methody; use strict; use Pod::Simple (); use vars qw(@ISA $VERSION); $VERSION = '3.42'; @ISA = ('Pod::Simple'); # Yes, we could use named variables, but I want this to be impose # as little an additional performance hit as possible. sub _handle_element_start { $_[1] =~ tr/-:./__/; ( $_[0]->can( 'start_' . $_[1] ) \|\| return )->( $_[0], $_[2] ); } sub _handle_text { ( $_[0]->can( 'handle_text' ) \|\| return )->( @_ ); } sub _handle_element_end { $_[1] =~ tr/-:./__/; ( $_[0]->can( 'end_' . $_[1] ) \|\| return )->( $_[0], $_[2] ); } 1; __END__ =head1 NAME Pod::Simple::Methody -- turn Pod::Simple events into method calls =head1 SYNOPSIS require 5; use strict; package SomePodFormatter; use base qw(Pod::Simple::Methody); sub handle_text { my($self, $text) = @_; ... } sub start_head1 { my($self, $attrs) = @_; ... } sub end_head1 { my($self) = @_; ... } ...and start_/end_ methods for whatever other events you want to catch. =head1 DESCRIPTION This class is of interest to people writing Pod formatters based on Pod::Simple. This class (which is very small -- read the source) overrides Pod::Simple's _handle_element_start, _handle_text, and _handle_element_end methods so that parser events are turned into method calls. (Otherwise, this is a subclass of L<Pod::Simple> and inherits all its methods.) You can use this class as the base class for a Pod formatter/processor. =head1 METHOD CALLING When Pod::Simple sees a "=head1 Hi there", for example, it basically does this: $parser->_handle_element_start( "head1", \%attributes ); $parser->_handle_text( "Hi there" ); $parser->_handle_element_end( "head1" ); But if you subclass Pod::Simple::Methody, it will instead do this when it sees a "=head1 Hi there": $parser->start_head1( \%attributes ) if $parser->can('start_head1'); $parser->handle_text( "Hi there" ) if $parser->can('handle_text'); $parser->end_head1() if $parser->can('end_head1'); If Pod::Simple sends an event where the element name has a dash, period, or colon, the corresponding method name will have a underscore in its place. For example, "foo.bar:baz" becomes start_foo_bar_baz and end_foo_bar_baz. See the source for Pod::Simple::Text for an example of using this class. =head1 SEE ALSO L<Pod::Simple>, L<Pod::Simple::Subclassing> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\�((�� DumpAsText.pmnu��[�� require 5; package Pod::Simple::DumpAsText; $VERSION = '3.42'; use Pod::Simple (); BEGIN {@ISA = ('Pod::Simple')} use strict; use Carp (); BEGIN { DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } sub new { my $self = shift; my $new = $self->SUPER::new(@_); $new->{'output_fh'} \|\|= STDOUT{IO}; $new->accept_codes('VerbatimFormatted'); $new->keep_encoding_directive(1); return $new; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub _handle_element_start { # ($self, $element_name, $attr_hash_r) my $fh = $_[0]{'output_fh'}; my($key, $value); DEBUG and print STDERR "++ $_[1]\n"; print $fh ' ' x ($_[0]{'indent'} \|\| 0), "++", $_[1], "\n"; $_[0]{'indent'}++; while(($key,$value) = each %{$_[2]}) { unless($key =~ m/^~/s) { next if $key eq 'start_line' and $_[0]{'hide_line_numbers'}; _perly_escape($key); _perly_escape($value); printf $fh qq{%s \\ "%s" => "%s"\n}, ' ' x ($_[0]{'indent'} \|\| 0), $key, $value; } } return; } sub _handle_text { DEBUG and print STDERR "== \"$_[1]\"\n"; if(length $_[1]) { my $indent = ' ' x $_[0]{'indent'}; my $text = $_[1]; _perly_escape($text); $text =~ # A not-totally-brilliant wrapping algorithm: s/( [^\n]{55} # Snare some characters from a line [^\n\ ]{0,50} # and finish any current word ) \ {1,10}(?!\n) # capture some spaces not at line-end /$1"\n$indent . "/gx # => line-break here ; print {$_[0]{'output_fh'}} $indent, '* "', $text, "\"\n"; } return; } sub _handle_element_end { DEBUG and print STDERR "-- $_[1]\n"; print {$_[0]{'output_fh'}} ' ' x --$_[0]{'indent'}, "--", $_[1], "\n"; return; } # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . sub _perly_escape { foreach my $x (@_) { $x =~ s/([^\x00-\xFF])/sprintf'\x{%X}',ord($1)/eg; # Escape things very cautiously: $x =~ s/([^-\n\t \&\<\>\'!\#\%\\+,\.\/\:\;=\?\~\[\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf'\x%02X',ord($1)/eg; } return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1; __END__ =head1 NAME Pod::Simple::DumpAsText -- dump Pod-parsing events as text =head1 SYNOPSIS perl -MPod::Simple::DumpAsText -e \ "exit Pod::Simple::DumpAsText->filter(shift)->any_errata_seen" \ thingy.pod =head1 DESCRIPTION This class is for dumping, as text, the events gotten from parsing a Pod document. This class is of interest to people writing Pod formatters based on Pod::Simple. It is useful for seeing exactly what events you get out of some Pod that you feed in. This is a subclass of L<Pod::Simple> and inherits all its methods. =head1 SEE ALSO L<Pod::Simple::DumpAsXML> L<Pod::Simple> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\��o;��LinkSection.pmnu��[�� require 5; package Pod::Simple::LinkSection; # Based somewhat dimly on Array::Autojoin use strict; use Pod::Simple::BlackBox; use vars qw($VERSION ); $VERSION = '3.42'; use overload( # So it'll stringify nice '""' => \&Pod::Simple::BlackBox::stringify_lol, 'bool' => \&Pod::Simple::BlackBox::stringify_lol, # '.=' => \&tack_on, # grudgingly support 'fallback' => 1, # turn on cleverness ); sub tack_on { $_[0] = ['', {}, "$_[0]" ]; return $_[0][2] .= $_[1]; } sub as_string { goto &Pod::Simple::BlackBox::stringify_lol; } sub stringify { goto &Pod::Simple::BlackBox::stringify_lol; } sub new { my $class = shift; $class = ref($class) \|\| $class; my $new; if(@_ == 1) { if (!ref($_[0] \|\| '')) { # most common case: one bare string return bless ['', {}, $_[0] ], $class; } elsif( ref($_[0] \|\| '') eq 'ARRAY') { $new = [ @{ $_[0] } ]; } else { Carp::croak( "$class new() doesn't know to clone $new" ); } } else { # misc stuff $new = [ '', {}, @_ ]; } # By now it's a treelet: [ 'foo', {}, ... ] foreach my $x (@$new) { if(ref($x \|\| '') eq 'ARRAY') { $x = $class->new($x); # recurse } elsif(ref($x \|\| '') eq 'HASH') { $x = { %$x }; } # otherwise leave it. } return bless $new, $class; } # Not much in this class is likely to be link-section specific -- # but it just so happens that link-sections are about the only treelets # that are exposed to the user. 1; __END__ # TODO: let it be an option whether a given subclass even wants little treelets? __END__ =head1 NAME Pod::Simple::LinkSection -- represent "section" attributes of L codes =head1 SYNOPSIS # a long story =head1 DESCRIPTION This class is not of interest to general users. Pod::Simple uses this class for representing the value of the "section" attribute of "L" start-element events. Most applications can just use the normal stringification of objects of this class; they stringify to just the text content of the section, such as "foo" for C<< LZ<><Stuff/foo> >>, and "bar" for C<< LZ<><Stuff/bIZ<><ar>> >>. However, anyone particularly interested in getting the full value of the treelet, can just traverse the content of the treeleet @$treelet_object. To wit: % perl -MData::Dumper -e "use base qw(Pod::Simple::Methody); sub start_L { print Dumper($_[1]{'section'} ) } __PACKAGE__->new->parse_string_document('=head1 L<Foo/bI<ar>baz>>') " Output: $VAR1 = bless( [ '', {}, 'b', bless( [ 'I', {}, 'ar' ], 'Pod::Simple::LinkSection' ), 'baz' ], 'Pod::Simple::LinkSection' ); But stringify it and you get just the text content: % perl -MData::Dumper -e "use base qw(Pod::Simple::Methody); sub start_L { print Dumper( '' . $_[1]{'section'} ) } __PACKAGE__->new->parse_string_document('=head1 L<Foo/bI<ar>baz>>') " Output: $VAR1 = 'barbaz'; =head1 SEE ALSO L<Pod::Simple> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2004 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\l�<�� TextContent.pmnu��[�� require 5; package Pod::Simple::TextContent; use strict; use Carp (); use Pod::Simple (); use vars qw( @ISA $VERSION ); $VERSION = '3.42'; @ISA = ('Pod::Simple'); sub new { my $self = shift; my $new = $self->SUPER::new(@_); $new->{'output_fh'} \|\|= STDOUT{IO}; $new->nix_X_codes(1); return $new; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub _handle_element_start { print {$_[0]{'output_fh'}} "\n" unless $_[1] =~ m/^[A-Z]$/s; return; } sub _handle_text { $_[1] =~ s/$Pod::Simple::shy//g; $_[1] =~ s/$Pod::Simple::nbsp/ /g; print {$_[0]{'output_fh'}} $_[1]; return; } sub _handle_element_end { print {$_[0]{'output_fh'}} "\n" unless $_[1] =~ m/^[A-Z]$/s; return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1; __END__ =head1 NAME Pod::Simple::TextContent -- get the text content of Pod =head1 SYNOPSIS TODO perl -MPod::Simple::TextContent -e \ "exit Pod::Simple::TextContent->filter(shift)->any_errata_seen" \ thingy.pod =head1 DESCRIPTION This class is that parses Pod and dumps just the text content. It is mainly meant for use by the Pod::Simple test suite, but you may find some other use for it. This is a subclass of L<Pod::Simple> and inherits all its methods. =head1 SEE ALSO L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Spell> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\-�8��8�� Checker.pmnu��[�� # A quite dimwitted pod2plaintext that need only know how to format whatever # text comes out of Pod::BlackBox's _gen_errata require 5; package Pod::Simple::Checker; use strict; use Carp (); use Pod::Simple::Methody (); use Pod::Simple (); use vars qw( @ISA $VERSION ); $VERSION = '3.42'; @ISA = ('Pod::Simple::Methody'); BEGIN { DEBUG = defined(&Pod::Simple::DEBUG) ? \&Pod::Simple::DEBUG : sub() {0} } use Text::Wrap 98.112902 (); # was 2001.0131, but I don't think we need that $Text::Wrap::wrap = 'overflow'; #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub any_errata_seen { # read-only accessor return $_[1]->{'Errata_seen'}; } sub new { my $self = shift; my $new = $self->SUPER::new(@_); $new->{'output_fh'} \|\|= STDOUT{IO}; $new->nix_X_codes(1); $new->nbsp_for_S(1); $new->{'Thispara'} = ''; $new->{'Indent'} = 0; $new->{'Indentstring'} = ' '; $new->{'Errata_seen'} = 0; return $new; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub handle_text { $_[0]{'Errata_seen'} and $_[0]{'Thispara'} .= $_[1] } sub start_Para { $_[0]{'Thispara'} = '' } sub start_head1 { if($_[0]{'Errata_seen'}) { $_[0]{'Thispara'} = ''; } else { if($_[1]{'errata'}) { # start of errata! $_[0]{'Errata_seen'} = 1; $_[0]{'Thispara'} = $_[0]{'source_filename'} ? "$_[0]{'source_filename'} -- " : '' } } } sub start_head2 { $_[0]{'Thispara'} = '' } sub start_head3 { $_[0]{'Thispara'} = '' } sub start_head4 { $_[0]{'Thispara'} = '' } sub start_Verbatim { $_[0]{'Thispara'} = '' } sub start_item_bullet { $_[0]{'Thispara'} = '* ' } sub start_item_number { $_[0]{'Thispara'} = "$_[1]{'number'}. " } sub start_item_text { $_[0]{'Thispara'} = '' } sub start_over_bullet { ++$_[0]{'Indent'} } sub start_over_number { ++$_[0]{'Indent'} } sub start_over_text { ++$_[0]{'Indent'} } sub start_over_block { ++$_[0]{'Indent'} } sub end_over_bullet { --$_[0]{'Indent'} } sub end_over_number { --$_[0]{'Indent'} } sub end_over_text { --$_[0]{'Indent'} } sub end_over_block { --$_[0]{'Indent'} } # . . . . . Now the actual formatters: sub end_head1 { $_[0]->emit_par(-4) } sub end_head2 { $_[0]->emit_par(-3) } sub end_head3 { $_[0]->emit_par(-2) } sub end_head4 { $_[0]->emit_par(-1) } sub end_Para { $_[0]->emit_par( 0) } sub end_item_bullet { $_[0]->emit_par( 0) } sub end_item_number { $_[0]->emit_par( 0) } sub end_item_text { $_[0]->emit_par(-2) } sub emit_par { return unless $_[0]{'Errata_seen'}; my($self, $tweak_indent) = splice(@_,0,2); my $length = 2 * $self->{'Indent'} + ($tweak_indent\|\|0); my $indent = ' ' x ($length > 0 ? $length : 0); # Yes, 'STRING' x NEGATIVE gives '', same as 'STRING' x 0 # 'Negative repeat count does nothing' since 5.22 $self->{'Thispara'} =~ s/$Pod::Simple::shy//g; my $out = Text::Wrap::wrap($indent, $indent, $self->{'Thispara'} .= "\n"); $out =~ s/$Pod::Simple::nbsp/ /g; print {$self->{'output_fh'}} $out, #"\n" ; $self->{'Thispara'} = ''; return; } # . . . . . . . . . . And then off by its lonesome: sub end_Verbatim { return unless $_[0]{'Errata_seen'}; my $self = shift; $self->{'Thispara'} =~ s/$Pod::Simple::nbsp/ /g; $self->{'Thispara'} =~ s/$Pod::Simple::shy//g; my $i = ' ' x ( 2 * $self->{'Indent'} + 4); $self->{'Thispara'} =~ s/^/$i/mg; print { $self->{'output_fh'} } '', $self->{'Thispara'}, "\n\n" ; $self->{'Thispara'} = ''; return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1; __END__ =head1 NAME Pod::Simple::Checker -- check the Pod syntax of a document =head1 SYNOPSIS perl -MPod::Simple::Checker -e \ "exit Pod::Simple::Checker->filter(shift)->any_errata_seen" \ thingy.pod =head1 DESCRIPTION This class is for checking the syntactic validity of Pod. It works by basically acting like a simple-minded version of L<Pod::Simple::Text> that formats only the "Pod Errors" section (if Pod::Simple even generates one for the given document). This is a subclass of L<Pod::Simple> and inherits all its methods. =head1 SEE ALSO L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Checker> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\�E�V� �� HTMLLegacy.pmnu��[�� require 5; package Pod::Simple::HTMLLegacy; use strict; use vars qw($VERSION); use Getopt::Long; $VERSION = "5.01"; #-------------------------------------------------------------------------- # # This class is meant to thinly emulate bad old Pod::Html # # TODO: some basic docs sub pod2html { my @args = (@_); my( $verbose, $infile, $outfile, $title ); my $index = 1; { my($help); my($netscape); # dummy local @ARGV = @args; GetOptions( "help" => \$help, "verbose!" => \$verbose, "infile=s" => \$infile, "outfile=s" => \$outfile, "title=s" => \$title, "index!" => \$index, "netscape!" => \$netscape, ) or return bad_opts(@args); bad_opts(@args) if @ARGV; # it should be all switches! return help_message() if $help; } for($infile, $outfile) { $_ = undef unless defined and length } if($verbose) { warn sprintf "%s version %s\n", __PACKAGE__, $VERSION; warn "OK, processed args [@args] ...\n"; warn sprintf " Verbose: %s\n Index: %s\n Infile: %s\n Outfile: %s\n Title: %s\n", map defined($_) ? $_ : "(nil)", $verbose, $index, $infile, $outfile, $title, ; Pod::Simple::HTML::DEBUG = sub(){1}; } require Pod::Simple::HTML; Pod::Simple::HTML->VERSION(3); die "No such input file as $infile\n" if defined $infile and ! -e $infile; my $pod = Pod::Simple::HTML->new; $pod->force_title($title) if defined $title; $pod->index($index); return $pod->parse_from_file($infile, $outfile); } #-------------------------------------------------------------------------- sub bad_opts { die _help_message(); } sub help_message { print STDOUT _help_message() } #-------------------------------------------------------------------------- sub _help_message { join '', "[", __PACKAGE__, " version ", $VERSION, qq~] Usage: pod2html --help --infile=<name> --outfile=<name> --verbose --index --noindex Options: --help - prints this message. --[no]index - generate an index at the top of the resulting html (default behavior). --infile - filename for the pod to convert (input taken from stdin by default). --outfile - filename for the resulting html file (output sent to stdout by default). --title - title that will appear in resulting html file. --[no]verbose - self-explanatory (off by default). Note that pod2html is DEPRECATED, and this version implements only some of the options known to older versions. For more information, see 'perldoc pod2html'. ~; } 1; __END__ OVER the underpass! UNDER the overpass! Around the FUTURE and BEYOND REPAIR!! PK��]"\��\��TranscodeSmart.pmnu��[�� require 5; use 5.008; ## Anything before 5.8.0 is GIMPY! ## This module is to be use()'d only by Pod::Simple::Transcode package Pod::Simple::TranscodeSmart; use strict; use Pod::Simple; require Encode; use vars qw($VERSION ); $VERSION = '3.42'; sub is_dumb {0} sub is_smart {1} sub all_encodings { return Encode::->encodings(':all'); } sub encoding_is_available { return Encode::resolve_alias($_[1]); } sub encmodver { return "Encode.pm v" .($Encode::VERSION \|\| '?'); } sub make_transcoder { my $e = Encode::find_encoding($_[1]); die "WHAT ENCODING!?!?" unless $e; my $x; return sub { foreach $x (@_) { $x = $e->decode($x) unless Encode::is_utf8($x); } return; }; } 1; PK��]"\hª�Ǉ��Ǉ��HTML.pmnu��[��require 5; package Pod::Simple::HTML; use strict; use Pod::Simple::PullParser (); use vars qw( @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION $Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex $Doctype_decl $Content_decl ); @ISA = ('Pod::Simple::PullParser'); $VERSION = '3.42'; BEGIN { if(defined &DEBUG) { } # no-op elsif( defined &Pod::Simple::DEBUG ) { DEBUG = \&Pod::Simple::DEBUG } else { DEBUG = sub () {0}; } } $Doctype_decl \|\|= ''; # No. Just No. Don't even ask me for it. # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" # "http://www.w3.org/TR/html4/loose.dtd">\n}; $Content_decl \|\|= q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >}; $HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION; $Computerese = "" unless defined $Computerese; $LamePad = '' unless defined $LamePad; $Linearization_Limit = 120 unless defined $Linearization_Limit; # headings/items longer than that won't get an <a name="..."> $Perldoc_URL_Prefix = 'https://metacpan.org/pod/' unless defined $Perldoc_URL_Prefix; $Perldoc_URL_Postfix = '' unless defined $Perldoc_URL_Postfix; $Man_URL_Prefix = 'http://man.he.net/man'; $Man_URL_Postfix = ''; $Title_Prefix = '' unless defined $Title_Prefix; $Title_Postfix = '' unless defined $Title_Postfix; %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text # 'item-text' stuff in the index doesn't quite work, and may # not be a good idea anyhow. __PACKAGE__->_accessorize( 'perldoc_url_prefix', # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what # to put before the "Foo%3a%3aBar". # (for singleton mode only?) 'perldoc_url_postfix', # what to put after "Foo%3a%3aBar" in the URL. Normally "". 'man_url_prefix', # In turning L<crontab(5)> into http://whatever/man/1/crontab, what # to put before the "1/crontab". 'man_url_postfix', # what to put after the "1/crontab" in the URL. Normally "". 'batch_mode', # whether we're in batch mode 'batch_mode_current_level', # When in batch mode, how deep the current module is: 1 for "LWP", # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc 'title_prefix', 'title_postfix', # What to put before and after the title in the head. # Should already be &-escaped 'html_h_level', 'html_header_before_title', 'html_header_after_title', 'html_footer', 'top_anchor', 'index', # whether to add an index at the top of each page # (actually it's a table-of-contents, but we'll call it an index, # out of apparently longstanding habit) 'html_css', # URL of CSS file to point to 'html_javascript', # URL of Javascript file to point to 'force_title', # should already be &-escaped 'default_title', # should already be &-escaped ); #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ my @_to_accept; %Tagmap = ( 'Verbatim' => "\n<pre$Computerese>", '/Verbatim' => "</pre>\n", 'VerbatimFormatted' => "\n<pre$Computerese>", '/VerbatimFormatted' => "</pre>\n", 'VerbatimB' => "<b>", '/VerbatimB' => "</b>", 'VerbatimI' => "<i>", '/VerbatimI' => "</i>", 'VerbatimBI' => "<b><i>", '/VerbatimBI' => "</i></b>", 'Data' => "\n", '/Data' => "\n", 'head1' => "\n<h1>", # And also stick in an <a name="..."> 'head2' => "\n<h2>", # '' 'head3' => "\n<h3>", # '' 'head4' => "\n<h4>", # '' 'head5' => "\n<h5>", # '' 'head6' => "\n<h6>", # '' '/head1' => "</a></h1>\n", '/head2' => "</a></h2>\n", '/head3' => "</a></h3>\n", '/head4' => "</a></h4>\n", '/head5' => "</a></h5>\n", '/head6' => "</a></h6>\n", 'X' => "<!--\n\tINDEX: ", '/X' => "\n-->", changes(qw( Para=p B=b I=i over-bullet=ul over-number=ol over-text=dl over-block=blockquote item-bullet=li item-number=li item-text=dt )), changes2( map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ } qw[ sample=samp definition=dfn keyboard=kbd variable=var citation=cite abbreviation=abbr acronym=acronym subscript=sub superscript=sup big=big small=small underline=u strikethrough=s preformat=pre teletype=tt ] # no point in providing a way to get <q>...</q>, I think ), '/item-bullet' => "</li>$LamePad\n", '/item-number' => "</li>$LamePad\n", '/item-text' => "</a></dt>$LamePad\n", 'item-body' => "\n<dd>", '/item-body' => "</dd>\n", 'B' => "<b>", '/B' => "</b>", 'I' => "<i>", '/I' => "</i>", 'F' => "<em$Computerese>", '/F' => "</em>", 'C' => "<code$Computerese>", '/C' => "</code>", 'L' => "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used! '/L' => "</a>", ); sub changes { return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_" } @_; } sub changes2 { return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_" } @_; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 } # Just so we can run from the command line. No options. # For that, use perldoc! #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub new { my $new = shift->SUPER::new(@_); #$new->nix_X_codes(1); $new->nbsp_for_S(1); $new->accept_targets( 'html', 'HTML' ); $new->accept_codes('VerbatimFormatted'); $new->accept_codes(@_to_accept); DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n"; $new->perldoc_url_prefix( $Perldoc_URL_Prefix ); $new->perldoc_url_postfix( $Perldoc_URL_Postfix ); $new->man_url_prefix( $Man_URL_Prefix ); $new->man_url_postfix( $Man_URL_Postfix ); $new->title_prefix( $Title_Prefix ); $new->title_postfix( $Title_Postfix ); $new->html_header_before_title( qq[$Doctype_decl<html><head><title>] ); $new->html_header_after_title( join "\n" => "</title>", $Content_decl, "</head>\n<body class='pod'>", $new->version_tag_comment, "<!-- start doc -->\n", ); $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); $new->top_anchor( "<a name='___top' class='dummyTopAnchor' ></a>\n" ); $new->{'Tagmap'} = {%Tagmap}; return $new; } sub __adjust_html_h_levels { my ($self) = @_; my $Tagmap = $self->{'Tagmap'}; my $add = $self->html_h_level; return unless defined $add; return if ($self->{'Adjusted_html_h_levels'}\|\|0) == $add; $add -= 1; for (1 .. 6) { $Tagmap->{"head$_"} =~ s/$_/$_ + $add/e; $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e; } } sub batch_mode_page_object_init { my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_; DEBUG and print STDERR "Initting $self\n for $module\n", " in $infile\n out $outfile\n depth $depth\n"; $self->batch_mode(1); $self->batch_mode_current_level($depth); return $self; } sub run { my $self = $_[0]; return $self->do_middle if $self->bare_output; return $self->do_beginning && $self->do_middle && $self->do_end; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub do_beginning { my $self = $_[0]; my $title; if(defined $self->force_title) { $title = $self->force_title; DEBUG and print STDERR "Forcing title to be $title\n"; } else { # Actually try looking for the title in the document: $title = $self->get_short_title(); unless($self->content_seen) { DEBUG and print STDERR "No content seen in search for title.\n"; return; } $self->{'Title'} = $title; if(defined $title and $title =~ m/\S/) { $title = $self->title_prefix . esc($title) . $self->title_postfix; } else { $title = $self->default_title; $title = '' unless defined $title; DEBUG and print STDERR "Title defaults to $title\n"; } } my $after = $self->html_header_after_title \|\| ''; if($self->html_css) { my $link = $self->html_css =~ m/</ ? $self->html_css # It's a big blob of markup, let's drop it in : sprintf( # It's just a URL, so let's wrap it up qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n], $self->html_css, ); $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind } $self->_add_top_anchor(\$after); if($self->html_javascript) { my $link = $self->html_javascript =~ m/</ ? $self->html_javascript # It's a big blob of markup, let's drop it in : sprintf( # It's just a URL, so let's wrap it up qq[<script type="text/javascript" src="%s"></script>\n], $self->html_javascript, ); $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind } print {$self->{'output_fh'}} $self->html_header_before_title \|\| '', $title, # already escaped $after, ; DEBUG and print STDERR "Returning from do_beginning...\n"; return 1; } sub _add_top_anchor { my($self, $text_r) = @_; unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack $$text_r .= $self->top_anchor \|\| ''; } return; } sub version_tag_comment { my $self = shift; return sprintf "<!--\n generated by %s v%s,\n using %s v%s,\n under Perl v%s at %s GMT.\n\n %s\n\n-->\n", esc( ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(), $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} \|\| time)), ), $self->_modnote(), ; } sub _modnote { my $class = ref($_[0]) \|\| $_[0]; return join "\n " => grep m/\S/, split "\n", qq{ If you want to change this HTML document, you probably shouldn't do that by changing it directly. Instead, see about changing the calling options to $class, and/or subclassing $class, then reconverting this document from the Pod source. When in doubt, email the author of $class for advice. See 'perldoc $class' for more info. }; } sub do_end { my $self = $_[0]; print {$self->{'output_fh'}} $self->html_footer \|\| ''; return 1; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Normally this would just be a call to _do_middle_main_loop -- but we # have to do some elaborate things to emit all the content and then # summarize it and output it /before/ the content that it's a summary of. sub do_middle { my $self = $_[0]; return $self->_do_middle_main_loop unless $self->index; if( $self->output_string ) { # An efficiency hack my $out = $self->output_string; #it's a reference to it my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n"; $$out .= $sneakytag; $self->_do_middle_main_loop; $sneakytag = quotemeta($sneakytag); my $index = $self->index_as_html(); if( $$out =~ s/$sneakytag/$index/s ) { # Expected case DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n"; } else { DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n"; # I don't think this should ever happen. } return 1; } unless( $self->output_fh ) { require Carp; Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that."); } # If we get here, we're outputting to a FH. So we need to do some magic. # Namely, divert all content to a string, which we output after the index. my $fh = $self->output_fh; my $content = ''; { # Our horrible bait and switch: $self->output_string( \$content ); $self->_do_middle_main_loop; $self->abandon_output_string(); $self->output_fh($fh); } print $fh $self->index_as_html(); print $fh $content; return 1; } ########################################################################### sub index_as_html { my $self = $_[0]; # This is meant to be called AFTER the input document has been parsed! my $points = $self->{'PSHTML_index_points'} \|\| []; @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n]; # There's no point in having a 0-item or 1-item index, I dare say. my(@out) = qq{\n<div class='indexgroup'>}; my $level = 0; my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent); foreach my $p (@$points, ['head0', '(end)']) { ($tagname, $text) = @$p; $anchorname = $self->section_escape($text); if( $tagname =~ m{^head(\d+)$} ) { $target_level = 0 + $1; } else { # must be some kinda list item if($previous_tagname =~ m{^head\d+$} ) { $target_level = $level + 1; } else { $target_level = $level; # no change needed } } # Get to target_level by opening or closing ULs while($level > $target_level) { --$level; push @out, (" " x $level) . "</ul>"; } while($level < $target_level) { ++$level; push @out, (" " x ($level-1)) . "<ul class='indexList indexList$level'>"; } $previous_tagname = $tagname; next unless $level; $indent = ' ' x $level; push @out, sprintf "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>", $indent, $level, esc($anchorname), esc($text) ; } push @out, "</div>\n"; return join "\n", @out; } ########################################################################### sub _do_middle_main_loop { my $self = $_[0]; my $fh = $self->{'output_fh'}; my $tagmap = $self->{'Tagmap'}; $self->__adjust_html_h_levels; my($token, $type, $tagname, $linkto, $linktype); my @stack; my $dont_wrap = 0; while($token = $self->get_token) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( ($type = $token->type) eq 'start' ) { if(($tagname = $token->tagname) eq 'L') { $linktype = $token->attr('type') \|\| 'insane'; $linkto = $self->do_link($token); if(defined $linkto and length $linkto) { esc($linkto); # (Yes, SGML-escaping applies on top of %-escaping! # But it's rarely noticeable in practice.) print $fh qq{<a href="$linkto" class="podlink$linktype"\n>}; } else { print $fh "<a>"; # Yes, an 'a' element with no attributes! } } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) { print $fh $tagmap->{$tagname} \|\| next; my @to_unget; while(1) { push @to_unget, $self->get_token; last if $to_unget[-1]->is_end and $to_unget[-1]->tagname eq $tagname; # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens) } my $name = $self->linearize_tokens(@to_unget); $name = $self->do_section($name, $token) if defined $name; print $fh "<a "; if ($tagname =~ m/^head\d$/s) { print $fh "class='u'", $self->index ? " href='#___top' title='click to go to top of document'\n" : "\n"; } if(defined $name) { my $esc = esc( $self->section_name_tidy( $name ) ); print $fh qq[name="$esc"]; DEBUG and print STDERR "Linearized ", scalar(@to_unget), " tokens as \"$name\".\n"; push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name] if $ToIndex{ $tagname }; # Obviously, this discards all formatting codes (saving # just their content), but ahwell. } else { # ludicrously long, so nevermind DEBUG and print STDERR "Linearized ", scalar(@to_unget), " tokens, but it was too long, so nevermind.\n"; } print $fh "\n>"; $self->unget_token(@to_unget); } elsif ($tagname eq 'Data') { my $next = $self->get_token; next unless defined $next; unless( $next->type eq 'text' ) { $self->unget_token($next); next; } DEBUG and print STDERR " raw text ", $next->text, "\n"; # The parser sometimes preserves newlines and sometimes doesn't! (my $text = $next->text) =~ s/\n\z//; print $fh $text, "\n"; next; } else { if( $tagname =~ m/^over-/s ) { push @stack, ''; } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) { print $fh $stack[-1]; $stack[-1] = ''; } print $fh $tagmap->{$tagname} \|\| next; ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted" or $tagname eq 'X'; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } elsif( $type eq 'end' ) { if( ($tagname = $token->tagname) =~ m/^over-/s ) { if( my $end = pop @stack ) { print $fh $end; } } elsif( $tagname =~ m/^item-/s and @stack) { $stack[-1] = $tagmap->{"/$tagname"}; if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) { $self->unget_token($next); if( $next->type eq 'start' ) { print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"}; $stack[-1] = $tagmap->{"/item-body"}; } } next; } print $fh $tagmap->{"/$tagname"} \|\| next; --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X'; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } elsif( $type eq 'text' ) { esc($type = $token->text); # reuse $type, why not $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap; print $fh $type; } } return 1; } ########################################################################### # sub do_section { my($self, $name, $token) = @_; return $name; } sub do_link { my($self, $token) = @_; my $type = $token->attr('type'); if(!defined $type) { $self->whine("Typeless L!?", $token->attr('start_line')); } elsif( $type eq 'pod') { return $self->do_pod_link($token); } elsif( $type eq 'url') { return $self->do_url_link($token); } elsif( $type eq 'man') { return $self->do_man_link($token); } else { $self->whine("L of unknown type $type!?", $token->attr('start_line')); } return 'FNORG'; # should never get called } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub do_url_link { return $_[1]->attr('to') } sub do_man_link { my ($self, $link) = @_; my $to = $link->attr('to'); my $frag = $link->attr('section'); return undef unless defined $to and length $to; # should never happen $frag = $self->section_escape($frag) if defined $frag and length($frag .= ''); # (stringify) DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n"; return $self->resolve_man_page_link($to, $frag); } sub do_pod_link { # And now things get really messy... my($self, $link) = @_; my $to = $link->attr('to'); my $section = $link->attr('section'); return undef unless( # should never happen (defined $to and length $to) or (defined $section and length $section) ); $section = $self->section_escape($section) if defined $section and length($section .= ''); # (stringify) DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n", $to \|\| "(nil)", $section \|\| "(nil)"; { # An early hack: my $complete_url = $self->resolve_pod_link_by_table($to, $section); if( $complete_url ) { DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ", $complete_url, "\n (Returning that.)\n"; return $complete_url; } else { DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)", " didn't return anything interesting.\n"; } } if(defined $to and length $to) { # Give this routine first hack again my $there = $self->resolve_pod_link_by_table($to); if(defined $there and length $there) { DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T) gives $there\n"; } else { $there = $self->resolve_pod_page_link($to, $section); # (I pass it the section value, but I don't see a # particular reason it'd use it.) DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there \|\| "(nil)", "\n"; unless( defined $there and length $there ) { DEBUG and print STDERR "Can't resolve $to\n"; return undef; } # resolve_pod_page_link returning undef is how it # can signal that it gives up on making a link } $to = $there; } #DEBUG and print STDERR "So far [", $to\|\|'nil', "] [", $section\|\|'nil', "]\n"; my $out = (defined $to and length $to) ? $to : ''; $out .= "#" . $section if defined $section and length $section; unless(length $out) { # sanity check DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n", $to \|\| "(nil)", $section \|\| "(nil)"; return undef; } DEBUG and print STDERR "Resolved to $out\n"; return $out; } # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . sub section_escape { my($self, $section) = @_; return $self->section_url_escape( $self->section_name_tidy($section) ); } sub section_name_tidy { my($self, $section) = @_; $section =~ s/^\s+//; $section =~ s/\s+$//; $section =~ tr/ /_/; if ($] ge 5.006) { $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters } elsif ('A' eq chr(65)) { # But not on early EBCDIC $section =~ tr/\x00-\x1F\x80-\x9F//d; } $section = $self->unicode_escape_url($section); $section = '_' unless length $section; return $section; } sub section_url_escape { shift->general_url_escape(@_) } sub pagepath_url_escape { shift->general_url_escape(@_) } sub manpage_url_escape { shift->general_url_escape(@_) } sub general_url_escape { my($self, $string) = @_; $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C', $1/eg; # express Unicode things as urlencode(utf(orig)). # A pretty conservative escaping, behoovey even for query components # of a URL (see RFC 2396) if ($] ge 5.007_003) { $string =~ s/([^-_\.!~()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg; } else { # Is broken for non-ASCII platforms on early perls $string =~ s/([^-_\.!~()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg; } # Yes, stipulate the list without a range, so that this can work right on # all charsets that this module happens to run under. return $string; } #-------------------------------------------------------------------------- # # Oh look, a yawning portal to Hell! Let's play touch football right by it! # sub resolve_pod_page_link { # resolve_pod_page_link must return a properly escaped URL my $self = shift; return $self->batch_mode() ? $self->resolve_pod_page_link_batch_mode(@_) : $self->resolve_pod_page_link_singleton_mode(@_) ; } sub resolve_pod_page_link_singleton_mode { my($self, $it) = @_; return undef unless defined $it and length $it; my $url = $self->pagepath_url_escape($it); $url =~ s{::$}{}s; # probably never comes up anyway $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM? return undef unless length $url; return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix; } sub resolve_pod_page_link_batch_mode { my($self, $to) = @_; DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n"; my @path = grep length($_), split m/::/s, $to, -1; unless( @path ) { # sanity DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n"; return undef; } $self->batch_mode_rectify_path(\@path); my $out = join('/', map $self->pagepath_url_escape($_), @path) . $HTML_EXTENSION; DEBUG > 1 and print STDERR " => $out\n"; return $out; } sub batch_mode_rectify_path { my($self, $pathbits) = @_; my $level = $self->batch_mode_current_level; $level--; # how many levels up to go to get to the root if($level < 1) { unshift @$pathbits, '.'; # just to be pretty } else { unshift @$pathbits, ('..') x $level; } return; } sub resolve_man_page_link { my ($self, $to, $frag) = @_; my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/; return undef unless defined $page and length $page; $section \|\|= 1; return $self->man_url_prefix . "$section/" . $self->manpage_url_escape($page) . $self->man_url_postfix; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub resolve_pod_link_by_table { # A crazy hack to allow specifying custom L<foo> => URL mappings return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut my($self, $to, $section) = @_; # TODO: add a method that actually populates podhtml_LOT from a file? if(defined $section) { $to = '' unless defined $to and length $to; return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef! } else { return $self->{'podhtml_LOT'}{$to}; # quite possibly undef! } return; } ########################################################################### sub linearize_tokens { # self, tokens my $self = shift; my $out = ''; my $t; while($t = shift @_) { if(!ref $t or !UNIVERSAL::can($t, 'is_text')) { $out .= $t; # a string, or some insane thing } elsif($t->is_text) { $out .= $t->text; } elsif($t->is_start and $t->tag eq 'X') { # Ignore until the end of this X<...> sequence: my $x_open = 1; while($x_open) { next if( ($t = shift @_)->is_text ); if( $t->is_start and $t->tag eq 'X') { ++$x_open } elsif($t->is_end and $t->tag eq 'X') { --$x_open } } } } return undef if length $out > $Linearization_Limit; return $out; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub unicode_escape_url { my($self, $string) = @_; $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg; # Turn char 1234 into "(1234)" return $string; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub esc { # a function. if(defined wantarray) { if(wantarray) { @_ = splice @_; # break aliasing } else { my $x = shift; if ($] ge 5.007_003) { $x =~ s/([^-\n\t !\#\$\%\\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg; } else { # Is broken for non-ASCII platforms on early perls $x =~ s/([^-\n\t !\#\$\%\\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; } return $x; } } foreach my $x (@_) { # Escape things very cautiously: if (defined $x) { if ($] ge 5.007_003) { $x =~ s/([^-\n\t !\#\$\%\\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg } else { # Is broken for non-ASCII platforms on early perls $x =~ s/([^-\n\t !\#\$\%\\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg } } # Leave out "- so that "--" won't make it thru in X-generated comments # with text in them. # Yes, stipulate the list without a range, so that this can work right on # all charsets that this module happens to run under. } return @_; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1; __END__ =head1 NAME Pod::Simple::HTML - convert Pod to HTML =head1 SYNOPSIS perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod =head1 DESCRIPTION This class is for making an HTML rendering of a Pod document. This is a subclass of L<Pod::Simple::PullParser> and inherits all its methods (and options). Note that if you want to do a batch conversion of a lot of Pod documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>. =head1 CALLING FROM THE COMMAND LINE TODO perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html =head1 CALLING FROM PERL =head2 Minimal code use Pod::Simple::HTML; my $p = Pod::Simple::HTML->new; $p->output_string(\my $html); $p->parse_file('path/to/Module/Name.pm'); open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n"; print $out $html; =head2 More detailed example use Pod::Simple::HTML; Set the content type: $Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >}; my $p = Pod::Simple::HTML->new; Include a single javascript source: $p->html_javascript('http://abc.com/a.js'); Or insert multiple javascript source in the header (or for that matter include anything, thought this is not recommended) $p->html_javascript(' <script type="text/javascript" src="http://abc.com/b.js"></script> <script type="text/javascript" src="http://abc.com/c.js"></script>'); Include a single css source in the header: $p->html_css('/style.css'); or insert multiple css sources: $p->html_css(' <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="http://remote.server.com/jquery.css"> <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="/style.css">'); Tell the parser where should the output go. In this case it will be placed in the $html variable: my $html; $p->output_string(\$html); Parse and process a file with pod in it: $p->parse_file('path/to/Module/Name.pm'); =head1 METHODS TODO all (most?) accessorized methods The following variables need to be set B<before> the call to the ->new constructor. Set the string that is included before the opening <html> tag: $Pod::Simple::HTML::Doctype_decl = qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n}; Set the content-type in the HTML head: (defaults to ISO-8859-1) $Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >}; Set the value that will be embedded in the opening tags of F, C tags and verbatim text. F maps to <em>, C maps to <code>, Verbatim text maps to <pre> (Computerese defaults to "") $Pod::Simple::HTML::Computerese = ' class="some_class_name'; =head2 html_css =head2 html_javascript =head2 title_prefix =head2 title_postfix =head2 html_header_before_title This includes everything before the <title> opening tag including the Document type and including the opening <title> tag. The following call will set it to be a simple HTML file: $p->html_header_before_title('<html><head><title>'); =head2 top_anchor By default Pod::Simple::HTML adds a dummy anchor at the top of the HTML. You can change it by calling $p->top_anchor('<a name="zz" >'); =head2 html_h_level Normally =head1 will become <h1>, =head2 will become <h2> etc. Using the html_h_level method will change these levels setting the h level of =head1 tags: $p->html_h_level(3); Will make sure that =head1 will become <h3> and =head2 will become <h4> etc... =head2 index Set it to some true value if you want to have an index (in reality a table of contents) to be added at the top of the generated HTML. $p->index(1); =head2 html_header_after_title Includes the closing tag of </title> and through the rest of the head till the opening of the body $p->html_header_after_title('</title>...</head><body id="my_id">'); =head2 html_footer The very end of the document: $p->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); =head1 SUBCLASSING Can use any of the methods described above but for further customization one needs to override some of the methods: package My::Pod; use strict; use warnings; use base 'Pod::Simple::HTML'; # needs to return a URL string such # http://some.other.com/page.html # #anchor_in_the_same_file # /internal/ref.html sub do_pod_link { # My::Pod object and Pod::Simple::PullParserStartToken object my ($self, $link) = @_; say $link->tagname; # will be L for links say $link->attr('to'); # say $link->attr('type'); # will be 'pod' always say $link->attr('section'); # Links local to our web site if ($link->tagname eq 'L' and $link->attr('type') eq 'pod') { my $to = $link->attr('to'); if ($to =~ /^Padre::/) { $to =~ s{::}{/}g; return "/docs/Padre/$to.html"; } } # all other links are generated by the parent class my $ret = $self->SUPER::do_pod_link($link); return $ret; } 1; Meanwhile in script.pl: use My::Pod; my $p = My::Pod->new; my $html; $p->output_string(\$html); $p->parse_file('path/to/Module/Name.pm'); open my $out, '>', 'out.html' or die; print $out $html; TODO maybe override do_beginning do_end =head1 SEE ALSO L<Pod::Simple>, L<Pod::Simple::HTMLBatch> TODO: a corpus of sample Pod input and HTML output? Or common idioms? =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002-2004 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 ACKNOWLEDGEMENTS Thanks to L<Hurricane Electric\|http://he.net/> for permission to use its L<Linux man pages online\|http://man.he.net/> site for man page links. Thanks to L<search.cpan.org\|http://search.cpan.org/> for permission to use the site for Perl module links. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\�-�D��D��PullParserEndToken.pmnu��[�� require 5; package Pod::Simple::PullParserEndToken; use Pod::Simple::PullParserToken (); use strict; use vars qw(@ISA $VERSION); @ISA = ('Pod::Simple::PullParserToken'); $VERSION = '3.42'; sub new { # Class->new(tagname); my $class = shift; return bless ['end', @_], ref($class) \|\| $class; } # Purely accessors: sub tagname { (@_ == 2) ? ($_[0][1] = $_[1]) : $_[0][1] } sub tag { shift->tagname(@_) } # shortcut: sub is_tagname { $_[0][1] eq $_[1] } sub is_tag { shift->is_tagname(@_) } 1; __END__ =head1 NAME Pod::Simple::PullParserEndToken -- end-tokens from Pod::Simple::PullParser =head1 SYNOPSIS (See L<Pod::Simple::PullParser>) =head1 DESCRIPTION When you do $parser->get_token on a L<Pod::Simple::PullParser>, you might get an object of this class. This is a subclass of L<Pod::Simple::PullParserToken> and inherits all its methods, and adds these methods: =over =item $token->tagname This returns the tagname for this end-token object. For example, parsing a "=head1 ..." line will give you a start-token with the tagname of "head1", token(s) for its content, and then an end-token with the tagname of "head1". =item $token->tagname(I<somestring>) This changes the tagname for this end-token object. You probably won't need to do this. =item $token->tag(...) A shortcut for $token->tagname(...) =item $token->is_tag(I<somestring>) or $token->is_tagname(I<somestring>) These are shortcuts for C<< $token->tag() eq I<somestring> >> =back You're unlikely to ever need to construct an object of this class for yourself, but if you want to, call C<< Pod::Simple::PullParserEndToken->new( I<tagname> ) >> =head1 SEE ALSO L<Pod::Simple::PullParserToken>, L<Pod::Simple>, L<Pod::Simple::Subclassing> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\Y�ܤ��PullParserToken.pmnu��[�� require 5; package Pod::Simple::PullParserToken; # Base class for tokens gotten from Pod::Simple::PullParser's $parser->get_token @ISA = (); $VERSION = '3.42'; use strict; sub new { # Class->new('type', stuff...); ## Overridden in derived classes anyway my $class = shift; return bless [@_], ref($class) \|\| $class; } sub type { $_[0][0] } # Can't change the type of an object sub dump { Pod::Simple::pretty( [ @{ $_[0] } ] ) } sub is_start { $_[0][0] eq 'start' } sub is_end { $_[0][0] eq 'end' } sub is_text { $_[0][0] eq 'text' } 1; __END__ sub dump { '[' . _esc( @{ $_[0] } ) . ']' } # JUNK: sub _esc { return '' unless @_; my @out; foreach my $in (@_) { push @out, '"' . $in . '"'; $out[-1] =~ s/([^- \:\:\.\,\'\>\<\"\/\=\?\+\\|\[\]\{\}\_a-zA-Z0-9_\`\~\!\#\%\^\&\])/ sprintf( (ord($1) < 256) ? "\\x%02X" : "\\x{%X}", ord($1)) /eg; } return join ', ', @out; } __END__ =head1 NAME Pod::Simple::PullParserToken -- tokens from Pod::Simple::PullParser =head1 SYNOPSIS Given a $parser that's an object of class Pod::Simple::PullParser (or a subclass)... while(my $token = $parser->get_token) { $DEBUG and print STDERR "Token: ", $token->dump, "\n"; if($token->is_start) { ...access $token->tagname, $token->attr, etc... } elsif($token->is_text) { ...access $token->text, $token->text_r, etc... } elsif($token->is_end) { ...access $token->tagname... } } (Also see L<Pod::Simple::PullParser>) =head1 DESCRIPTION When you do $parser->get_token on a L<Pod::Simple::PullParser>, you should get an object of a subclass of Pod::Simple::PullParserToken. Subclasses will add methods, and will also inherit these methods: =over =item $token->type This returns the type of the token. This will be either the string "start", the string "text", or the string "end". Once you know what the type of an object is, you then know what subclass it belongs to, and therefore what methods it supports. Yes, you could probably do the same thing with code like $token->isa('Pod::Simple::PullParserEndToken'), but that's not so pretty as using just $token->type, or even the following shortcuts: =item $token->is_start This is a shortcut for C<< $token->type() eq "start" >> =item $token->is_text This is a shortcut for C<< $token->type() eq "text" >> =item $token->is_end This is a shortcut for C<< $token->type() eq "end" >> =item $token->dump This returns a handy stringified value of this object. This is useful for debugging, as in: while(my $token = $parser->get_token) { $DEBUG and print STDERR "Token: ", $token->dump, "\n"; ... } =back =head1 SEE ALSO My subclasses: L<Pod::Simple::PullParserStartToken>, L<Pod::Simple::PullParserTextToken>, and L<Pod::Simple::PullParserEndToken>. L<Pod::Simple::PullParser> and L<Pod::Simple> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\�{X� �� TiedOutFH.pmnu��[�� use strict; package Pod::Simple::TiedOutFH; use Symbol ('gensym'); use Carp (); use vars qw($VERSION ); $VERSION = '3.42'; #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub handle_on { # some horrible frightening things are encapsulated in here my $class = shift; $class = ref($class) \|\| $class; Carp::croak "Usage: ${class}->handle_on(\$somescalar)" unless @_; my $x = (defined($_[0]) and ref($_[0])) ? $_[0] : ( \( $_[0] ) )[0] ; $$x = '' unless defined $$x; #Pod::Simple::DEBUG and print STDERR "New $class handle on $x = \"$$x\"\n"; my $new = gensym(); tie $new, $class, $x; return $new; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub TIEHANDLE { # Ties to just a scalar ref my($class, $scalar_ref) = @_; $$scalar_ref = '' unless defined $$scalar_ref; return bless \$scalar_ref, ref($class) \|\| $class; } sub PRINT { my $it = shift; foreach my $x (@_) { $$$it .= $x } #Pod::Simple::DEBUG > 10 and print STDERR " appended to $$it = \"$$$it\"\n"; return 1; } sub FETCH { return ${$_[0]}; } sub PRINTF { my $it = shift; my $format = shift; $$$it .= sprintf $format, @_; return 1; } sub FILENO { ${ $_[0] } + 100 } # just to produce SOME number sub CLOSE { 1 } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1; __END__ Chole 1 large red onion * 2 tomatillos * 4 or 5 roma tomatoes (optionally with the pulp discarded) * 1 tablespoons chopped ginger root (or more, to taste) * 2 tablespoons canola oil (or vegetable oil) * 1 tablespoon garam masala * 1/2 teaspoon red chili powder, or to taste * Salt, to taste (probably quite a bit) * 2 (15-ounce) cans chick peas or garbanzo beans, drained and rinsed * juice of one smallish lime * a dash of balsamic vinegar (to taste) * cooked rice, preferably long-grain white rice (whether plain, basmati rice, jasmine rice, or even a mild pilaf) In a blender or food processor, puree the onions, tomatoes, tomatillos, and ginger root. You can even do it with a Braun hand "mixer", if you chop things finer to start with, and work at it. In a saucepan set over moderate heat, warm the oil until hot. Add the puree and the balsamic vinegar, and cook, stirring occasionally, for 20 to 40 minutes. (Cooking it longer will make it sweeter.) Add the Garam Masala, chili powder, and cook, stirring occasionally, for 5 minutes. Add the salt and chick peas and cook, stirring, until heated through. Stir in the lime juice, and optionally one or two teaspoons of tahini. You can let it simmer longer, depending on how much softer you want the garbanzos to get. Serve over rice, like a curry. Yields 5 to 7 servings. PK��]"\,)��Transcode.pmnu��[�� require 5; package Pod::Simple::Transcode; use strict; use vars qw($VERSION @ISA); $VERSION = '3.42'; BEGIN { if(defined &DEBUG) {;} # Okay elsif( defined &Pod::Simple::DEBUG ) { DEBUG = \&Pod::Simple::DEBUG; } else { DEBUG = sub () {0}; } } foreach my $class ( 'Pod::Simple::TranscodeSmart', 'Pod::Simple::TranscodeDumb', '', ) { $class or die "Couldn't load any encoding classes"; DEBUG and print STDERR "About to try loading $class...\n"; eval "require $class;"; if($@) { DEBUG and print STDERR "Couldn't load $class: $@\n"; } else { DEBUG and print STDERR "OK, loaded $class.\n"; @ISA = ($class); last; } } sub _blorp { return; } # just to avoid any "empty class" warning 1; __END__ PK��]"\�C�::��:��XMLOutStream.pmnu��[�� require 5; package Pod::Simple::XMLOutStream; use strict; use Carp (); use Pod::Simple (); use vars qw( $ATTR_PAD @ISA $VERSION $SORT_ATTRS); $VERSION = '3.42'; BEGIN { @ISA = ('Pod::Simple'); DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG; } $ATTR_PAD = "\n" unless defined $ATTR_PAD; # Don't mess with this unless you know what you're doing. $SORT_ATTRS = 0 unless defined $SORT_ATTRS; sub new { my $self = shift; my $new = $self->SUPER::new(@_); $new->{'output_fh'} \|\|= STDOUT{IO}; $new->keep_encoding_directive(1); #$new->accept_codes('VerbatimFormatted'); return $new; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub _handle_element_start { # ($self, $element_name, $attr_hash_r) my $fh = $_[0]{'output_fh'}; my($key, $value); DEBUG and print STDERR "++ $_[1]\n"; print $fh "<", $_[1]; if($SORT_ATTRS) { foreach my $key (sort keys %{$_[2]}) { unless($key =~ m/^~/s) { next if $key eq 'start_line' and $_[0]{'hide_line_numbers'}; _xml_escape($value = $_[2]{$key}); print $fh $ATTR_PAD, $key, '="', $value, '"'; } } } else { # faster while(($key,$value) = each %{$_[2]}) { unless($key =~ m/^~/s) { next if $key eq 'start_line' and $_[0]{'hide_line_numbers'}; _xml_escape($value); print $fh $ATTR_PAD, $key, '="', $value, '"'; } } } print $fh ">"; return; } sub _handle_text { DEBUG and print STDERR "== \"$_[1]\"\n"; if(length $_[1]) { my $text = $_[1]; _xml_escape($text); print {$_[0]{'output_fh'}} $text; } return; } sub _handle_element_end { DEBUG and print STDERR "-- $_[1]\n"; print {$_[0]{'output_fh'}} "</", $_[1], ">"; return; } # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub _xml_escape { foreach my $x (@_) { # Escape things very cautiously: if ($] ge 5.007_003) { $x =~ s/([^-\n\t !\#\$\%\\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg; } else { # Is broken for non-ASCII platforms on early perls $x =~ s/([^-\n\t !\#\$\%\\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; } # Yes, stipulate the list without a range, so that this can work right on # all charsets that this module happens to run under. } return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1; __END__ =head1 NAME Pod::Simple::XMLOutStream -- turn Pod into XML =head1 SYNOPSIS perl -MPod::Simple::XMLOutStream -e \ "exit Pod::Simple::XMLOutStream->filter(shift)->any_errata_seen" \ thingy.pod =head1 DESCRIPTION Pod::Simple::XMLOutStream is a subclass of L<Pod::Simple> that parses Pod and turns it into XML. Pod::Simple::XMLOutStream inherits methods from L<Pod::Simple>. =head1 SEE ALSO L<Pod::Simple::DumpAsXML> is rather like this class; see its documentation for a discussion of the differences. L<Pod::Simple>, L<Pod::Simple::DumpAsXML>, L<Pod::SAX> L<Pod::Simple::Subclassing> The older (and possibly obsolete) libraries L<Pod::PXML>, L<Pod::XML> =head1 ABOUT EXTENDING POD TODO: An example or two of =extend, then point to Pod::Simple::Subclassing =head1 SEE ALSO L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Spell> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002-2004 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\�\|��R��R��BlackBox.pmnu��[��package Pod::Simple::BlackBox; # # "What's in the box?" "Pain." # ########################################################################### # # This is where all the scary things happen: parsing lines into # paragraphs; and then into directives, verbatims, and then also # turning formatting sequences into treelets. # # Are you really sure you want to read this code? # #----------------------------------------------------------------------------- # # The basic work of this module Pod::Simple::BlackBox is doing the dirty work # of parsing Pod into treelets (generally one per non-verbatim paragraph), and # to call the proper callbacks on the treelets. # # Every node in a treelet is a ['name', {attrhash}, ...children...] use integer; # vroom! use strict; use Carp (); use vars qw($VERSION ); $VERSION = '3.42'; #use constant DEBUG => 7; sub my_qr ($$) { # $1 is a pattern to compile and return. Older perls compile any # syntactically valid property, even if it isn't legal. To cope with # this, return an empty string unless the compiled pattern also # successfully matches $2, which the caller furnishes. my ($input_re, $should_match) = @_; # XXX could have a third parameter $shouldnt_match for extra safety my $use_utf8 = ($] le 5.006002) ? 'use utf8;' : ""; my $re = eval "no warnings; $use_utf8 qr/$input_re/"; #print STDERR __LINE__, ": $input_re: $@\n" if $@; return "" if $@; my $matches = eval "no warnings; $use_utf8 '$should_match' =~ /$re/"; #print STDERR __LINE__, ": $input_re: $@\n" if $@; return "" if $@; #print STDERR __LINE__, ": SUCCESS: $re\n" if $matches; return $re if $matches; #print STDERR __LINE__, ": $re: didn't match\n"; return ""; } BEGIN { require Pod::Simple; DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG } # Matches a character iff the character will have a different meaning # if we choose CP1252 vs UTF-8 if there is no =encoding line. # This is broken for early Perls on non-ASCII platforms. my $non_ascii_re = my_qr('[[:^ascii:]]', "\xB6"); $non_ascii_re = qr/[\x80-\xFF]/ unless $non_ascii_re; # Use patterns understandable by Perl 5.6, if possible my $cs_re = do { no warnings; my_qr('\p{IsCs}', "\x{D800}") }; my $cn_re = my_qr('\p{IsCn}', "\x{09E4}"); # <reserved> code point unlikely # to get assigned my $rare_blocks_re = my_qr('[\p{InIPAExtensions}\p{InSpacingModifierLetters}]', "\x{250}"); $rare_blocks_re = my_qr('[\x{0250}-\x{02FF}]', "\x{250}") unless $rare_blocks_re; my $script_run_re = eval 'no warnings "experimental::script_run"; qr/(script_run: ^ .* $ )/x'; my $latin_re = my_qr('[\p{IsLatin}\p{IsInherited}\p{IsCommon}]', "\x{100}"); unless ($latin_re) { # This was machine generated to be the ranges of the union of the above # three properties, with things that were undefined by Unicode 4.1 filling # gaps. That is the version in use when Perl advanced enough to # successfully compile and execute the above pattern. $latin_re = my_qr('[\x00-\x{02E9}\x{02EC}-\x{0374}\x{037E}\x{0385}\x{0387}\x{0485}\x{0486}\x{0589}\x{060C}\x{061B}\x{061F}\x{0640}\x{064B}-\x{0655}\x{0670}\x{06DD}\x{0951}-\x{0954}\x{0964}\x{0965}\x{0E3F}\x{10FB}\x{16EB}-\x{16ED}\x{1735}\x{1736}\x{1802}\x{1803}\x{1805}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1DC0}-\x{1EF9}\x{2000}-\x{2125}\x{2127}-\x{27FF}\x{2900}-\x{2B13}\x{2E00}-\x{2E1D}\x{2FF0}-\x{3004}\x{3006}\x{3008}-\x{3020}\x{302A}-\x{302D}\x{3030}-\x{3037}\x{303C}-\x{303F}\x{3099}-\x{309C}\x{30A0}\x{30FB}\x{30FC}\x{3190}-\x{319F}\x{31C0}-\x{31CF}\x{3220}-\x{325F}\x{327F}-\x{32CF}\x{3358}-\x{33FF}\x{4DC0}-\x{4DFF}\x{A700}-\x{A716}\x{FB00}-\x{FB06}\x{FD3E}\x{FD3F}\x{FE00}-\x{FE6B}\x{FEFF}-\x{FF65}\x{FF70}\x{FF9E}\x{FF9F}\x{FFE0}-\x{FFFD}\x{10100}-\x{1013F}\x{1D000}-\x{1D1DD}\x{1D300}-\x{1D7FF}]', "\x{100}"); } my $every_char_is_latin_re = my_qr("^(?:$latin_re)\\z", "A"); # Latin script code points not in the first release of Unicode my $later_latin_re = my_qr('[^\P{IsLatin}\p{IsAge=1.1}]', "\x{1F6}"); # If this perl doesn't have the Deprecated property, there's only one code # point in it that we need be concerned with. my $deprecated_re = my_qr('\p{IsDeprecated}', "\x{149}"); $deprecated_re = qr/\x{149}/ unless $deprecated_re; my $utf8_bom; if (($] ge 5.007_003)) { $utf8_bom = "\x{FEFF}"; utf8::encode($utf8_bom); } else { $utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls. } # This is used so that the 'content_seen' method doesn't return true on a # file that just happens to have a line that matches /^=[a-zA-z]/. Only if # there is a valid =foo line will we return that content was seen. my $seen_legal_directive = 0; #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub parse_line { shift->parse_lines(@_) } # alias # - - - Turn back now! Run away! - - - sub parse_lines { # Usage: $parser->parse_lines(@lines) # an undef means end-of-stream my $self = shift; my $code_handler = $self->{'code_handler'}; my $cut_handler = $self->{'cut_handler'}; my $wl_handler = $self->{'whiteline_handler'}; $self->{'line_count'} \|\|= 0; my $scratch; DEBUG > 4 and print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n"; DEBUG > 5 and print STDERR "# About to parse lines: ", join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; my $paras = ($self->{'paras'} \|\|= []); # paragraph buffer. Because we need to defer processing of =over # directives and verbatim paragraphs. We call _ponder_paragraph_buffer # to process this. $self->{'pod_para_count'} \|\|= 0; # An attempt to match the pod portions of a line. This is not fool proof, # but is good enough to serve as part of the heuristic for guessing the pod # encoding if not specified. my $codes = join '', grep { / ^ [A-Za-z] $/x } sort keys %{$self->{accept_codes}}; my $pod_chars_re = qr/ ^ = [A-Za-z]+ \| [\Q$codes\E] < /x; my $line; foreach my $source_line (@_) { if( $self->{'source_dead'} ) { DEBUG > 4 and print STDERR "# Source is dead.\n"; last; } unless( defined $source_line ) { DEBUG > 4 and print STDERR "# Undef-line seen.\n"; push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; push @$paras, $paras->[-1], $paras->[-1]; # So that it definitely fills the buffer. $self->{'source_dead'} = 1; $self->_ponder_paragraph_buffer; next; } if( $self->{'line_count'}++ ) { ($line = $source_line) =~ tr/\n\r//d; # If we don't have two vars, we'll end up with that there # tr/// modding the (potentially read-only) original source line! } else { DEBUG > 2 and print STDERR "First line: [$source_line]\n"; if( ($line = $source_line) =~ s/^$utf8_bom//s ) { DEBUG and print STDERR "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; $self->_handle_encoding_line( "=encoding utf8" ); delete $self->{'_processed_encoding'}; $line =~ tr/\n\r//d; } elsif( $line =~ s/^\xFE\xFF//s ) { DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; $self->scream( $self->{'line_count'}, "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." ); splice @_; push @_, undef; next; # TODO: implement somehow? } elsif( $line =~ s/^\xFF\xFE//s ) { DEBUG and print STDERR "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; $self->scream( $self->{'line_count'}, "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." ); splice @_; push @_, undef; next; # TODO: implement somehow? } else { DEBUG > 2 and print STDERR "First line is BOM-less.\n"; ($line = $source_line) =~ tr/\n\r//d; } } if(!$self->{'parse_characters'} && !$self->{'encoding'} && ($self->{'in_pod'} \|\| $line =~ /^=/s) && $line =~ /$non_ascii_re/ ) { my $encoding; # No =encoding line, and we are at the first pod line in the input that # contains a non-ascii byte, that is, one whose meaning varies depending # on whether the file is encoded in UTF-8 or CP1252, which are the two # possibilities permitted by the pod spec. (ASCII is assumed if the # file only contains ASCII bytes.) In order to process this line, we # need to figure out what encoding we will use for the file. # # Strictly speaking ISO 8859-1 (Latin 1) refers to the code points # 160-255, but it is used here, as it often colloquially is, to refer to # the complete set of code points 0-255, including ASCII (0-127), the C1 # controls (128-159), and strict Latin 1 (160-255). # # CP1252 is effectively a superset of Latin 1, because it differs only # from colloquial 8859-1 in the C1 controls, which are very unlikely to # actually be present in 8859-1 files, so can be used for other purposes # without conflict. CP 1252 uses most of them for graphic characters. # # Note that all ASCII-range bytes represent their corresponding code # points in both CP1252 and UTF-8. In ASCII platform UTF-8, all other # code points require multiple (non-ASCII) bytes to represent. (A # separate paragraph for EBCDIC is below.) The multi-byte # representation is quite structured. If we find an isolated byte that # would require multiple bytes to represent in UTF-8, we know that the # encoding is not UTF-8. If we find a sequence of bytes that violates # the UTF-8 structure, we also can presume the encoding isn't UTF-8, and # hence must be 1252. # # But there are ambiguous cases where we could guess wrong. If so, the # user will end up having to supply an =encoding line. We use all # readily available information to improve our chances of guessing # right. The odds of something not being UTF-8, but still passing a # UTF-8 validity test go down very rapidly with increasing length of the # sequence. Therefore we look at all non-ascii sequences on the line. # If any of the sequences can't be UTF-8, we quit there and choose # CP1252. If all could be UTF-8, we see if any of the code points # represented are unlikely to be in pod. If so, we guess CP1252. If # not, we check if the line is all in the same script; if not guess # CP1252; otherwise UTF-8. For perls that don't have convenient script # run testing, see if there is both Latin and non-Latin. If so, CP1252, # otherwise UTF-8. # # On EBCDIC platforms, the situation is somewhat different. In # UTF-EBCDIC, not only do ASCII-range bytes represent their code points, # but so do the bytes that are for the C1 controls. Recall that these # correspond to the unused portion of 8859-1 that 1252 mostly takes # over. That means that there are fewer code points that are # represented by multi-bytes. But, note that the these controls are # very unlikely to be in pod text. So if we encounter one of them, it # means that it is quite likely CP1252 and not UTF-8. The net result is # the same code below is used for both platforms. # # XXX probably if the line has E<foo> that evaluates to illegal CP1252, # then it is UTF-8. But we haven't processed E<> yet. goto set_1252 if $] lt 5.006_000; # No UTF-8 on very early perls my $copy; no warnings 'utf8'; if ($] ge 5.007_003) { $copy = $line; # On perls that have this function, we can use it to easily see if the # sequence is valid UTF-8 or not; if valid it turns on the UTF-8 flag # needed below for script run detection goto set_1252 if ! utf8::decode($copy); } elsif (ord("A") != 65) { # Early EBCDIC, assume UTF-8. What's a windows # code page doing here anyway? goto set_utf8; } else { # ASCII, no decode(): do it ourselves using the fundamental # characteristics of UTF-8 use if $] le 5.006002, 'utf8'; my $char_ord; my $needed; # How many continuation bytes to gobble up # Initialize the translated line with a dummy character that will be # deleted after everything else is done. This dummy makes sure that # $copy will be in UTF-8. Doing it now avoids the bugs in early perls # with upgrading in the middle $copy = chr(0x100); # Parse through the line for (my $i = 0; $i < length $line; $i++) { my $byte = substr($line, $i, 1); # ASCII bytes are trivially dealt with if ($byte !~ $non_ascii_re) { $copy .= $byte; next; } my $b_ord = ord $byte; # Now figure out what this code point would be if the input is # actually in UTF-8. If, in the process, we discover that it isn't # well-formed UTF-8, we guess CP1252. # # Start the process. If it is UTF-8, we are at the first, start # byte, of a multi-byte sequence. We look at this byte to figure # out how many continuation bytes are needed, and to initialize the # code point accumulator with the data from this byte. # # Normally the minimum continuation byte is 0x80, but in certain # instances the minimum is a higher number. So the code below # overrides this for those instances. my $min_cont = 0x80; if ($b_ord < 0xC2) { # A start byte < C2 is malformed goto set_1252; } elsif ($b_ord <= 0xDF) { $needed = 1; $char_ord = $b_ord & 0x1F; } elsif ($b_ord <= 0xEF) { $min_cont = 0xA0 if $b_ord == 0xE0; $needed = 2; $char_ord = $b_ord & (0x1F >> 1); } elsif ($b_ord <= 0xF4) { $min_cont = 0x90 if $b_ord == 0xF0; $needed = 3; $char_ord = $b_ord & (0x1F >> 2); } else { # F4 is the highest start byte for legal Unicode; higher is # unlikely to be in pod. goto set_1252; } # ? not enough continuation bytes available goto set_1252 if $i + $needed >= length $line; # Accumulate the ordinal of the character from the remaining # (continuation) bytes. while ($needed-- > 0) { my $cont = substr($line, ++$i, 1); $b_ord = ord $cont; goto set_1252 if $b_ord < $min_cont \|\| $b_ord > 0xBF; # In all cases, any next continuation bytes all have the same # minimum legal value $min_cont = 0x80; # Accumulate this byte's contribution to the code point $char_ord <<= 6; $char_ord \|= ($b_ord & 0x3F); } # Here, the sequence that formed this code point was valid UTF-8, # so add the completed character to the output $copy .= chr $char_ord; } # End of loop through line # Delete the dummy first character $copy = substr($copy, 1); } # Here, $copy is legal UTF-8. # If it can't be legal CP1252, no need to look further. (These bytes # aren't valid in CP1252.) This test could have been placed higher in # the code, but it seemed wrong to set the encoding to UTF-8 without # making sure that the very first instance is well-formed. But what if # it isn't legal CP1252 either? We have to choose one or the other, and # It seems safer to favor the single-byte encoding over the multi-byte. goto set_utf8 if ord("A") == 65 && $line =~ /[\x81\x8D\x8F\x90\x9D]/; # The C1 controls are not likely to appear in pod goto set_1252 if ord("A") == 65 && $copy =~ /[\x80-\x9F]/; # Nor are surrogates nor unassigned, nor deprecated. DEBUG > 8 and print STDERR __LINE__, ": $copy: surrogate\n" if $copy =~ $cs_re; goto set_1252 if $cs_re && $copy =~ $cs_re; DEBUG > 8 and print STDERR __LINE__, ": $copy: unassigned\n" if $cn_re && $copy =~ $cn_re; goto set_1252 if $cn_re && $copy =~ $cn_re; DEBUG > 8 and print STDERR __LINE__, ": $copy: deprecated\n" if $copy =~ $deprecated_re; goto set_1252 if $copy =~ $deprecated_re; # Nor are rare code points. But this is hard to determine. khw # believes that IPA characters and the modifier letters are unlikely to # be in pod (and certainly very unlikely to be the in the first line in # the pod containing non-ASCII) DEBUG > 8 and print STDERR __LINE__, ": $copy: rare\n" if $copy =~ $rare_blocks_re; goto set_1252 if $rare_blocks_re && $copy =~ $rare_blocks_re; # The first Unicode version included essentially every Latin character # in modern usage. So, a Latin character not in the first release will # unlikely be in pod. DEBUG > 8 and print STDERR __LINE__, ": $copy: later_latin\n" if $later_latin_re && $copy =~ $later_latin_re; goto set_1252 if $later_latin_re && $copy =~ $later_latin_re; # On perls that handle script runs, if the UTF-8 interpretation yields # a single script, we guess UTF-8, otherwise just having a mixture of # scripts is suspicious, so guess CP1252. We first strip off, as best # we can, the ASCII characters that look like they are pod directives, # as these would always show as mixed with non-Latin text. $copy =~ s/$pod_chars_re//g; if ($script_run_re) { goto set_utf8 if $copy =~ $script_run_re; DEBUG > 8 and print STDERR __LINE__, ": not script run\n"; goto set_1252; } # Even without script runs, but on recent enough perls and Unicodes, we # can check if there is a mixture of both Latin and non-Latin. Again, # having a mixture of scripts is suspicious, so assume CP1252 # If it's all non-Latin, there is no CP1252, as that is Latin # characters and punct, etc. DEBUG > 8 and print STDERR __LINE__, ": $copy: not latin\n" if $copy !~ $latin_re; goto set_utf8 if $copy !~ $latin_re; DEBUG > 8 and print STDERR __LINE__, ": $copy: all latin\n" if $copy =~ $every_char_is_latin_re; goto set_utf8 if $copy =~ $every_char_is_latin_re; DEBUG > 8 and print STDERR __LINE__, ": $copy: mixed\n"; set_1252: DEBUG > 9 and print STDERR __LINE__, ": $copy: is 1252\n"; $encoding = 'CP1252'; goto done_set; set_utf8: DEBUG > 9 and print STDERR __LINE__, ": $copy: is UTF-8\n"; $encoding = 'UTF-8'; done_set: $self->_handle_encoding_line( "=encoding $encoding" ); delete $self->{'_processed_encoding'}; $self->{'_transcoder'} && $self->{'_transcoder'}->($line); my ($word) = $line =~ /(\S$non_ascii_re\S)/; $self->whine( $self->{'line_count'}, "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding" ); } DEBUG > 5 and print STDERR "# Parsing line: [$line]\n"; if(!$self->{'in_pod'}) { if($line =~ m/^=([a-zA-Z][a-zA-Z0-9])(?:\s\|$)/s) { if($1 eq 'cut') { $self->scream( $self->{'line_count'}, "=cut found outside a pod block. Skipping to next block." ); ## Before there were errata sections in the world, it was ## least-pessimal to abort processing the file. But now we can ## just barrel on thru (but still not start a pod block). #splice @_; #push @_, undef; next; } else { $self->{'in_pod'} = $self->{'start_of_pod_block'} = $self->{'last_was_blank'} = 1; # And fall thru to the pod-mode block further down } } else { DEBUG > 5 and print STDERR "# It's a code-line.\n"; $code_handler->(map $_, $line, $self->{'line_count'}, $self) if $code_handler; # Note: this may cause code to be processed out of order relative # to pods, but in order relative to cuts. # Note also that we haven't yet applied the transcoding to $line # by time we call $code_handler! if( $line =~ m/^#\sline\s+(\d+)\s(?:\s"([^"]+)")?\s$/ ) { # That RE is from perlsyn, section "Plain Old Comments (Not!)", #$fname = $2 if defined $2; #DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n"; DEBUG > 1 and print STDERR "# Setting nextline to $1\n"; $self->{'line_count'} = $1 - 1; } next; } } # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . # Else we're in pod mode: # Apply any necessary transcoding: $self->{'_transcoder'} && $self->{'_transcoder'}->($line); # HERE WE CATCH =encoding EARLY! if( $line =~ m/^=encoding\s+\S+\s$/s ) { next if $self->parse_characters; # Ignore this line $line = $self->_handle_encoding_line( $line ); } if($line =~ m/^=cut/s) { # here ends the pod block, and therefore the previous pod para DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n"; $self->{'in_pod'} = 0; # ++$self->{'pod_para_count'}; $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. DEBUG > 6 and print STDERR "Processing any cut handler, line ${$self}{'line_count'}\n"; $cut_handler->(map $_, $line, $self->{'line_count'}, $self) if $cut_handler; # TODO: add to docs: Note: this may cause cuts to be processed out # of order relative to pods, but in order relative to code. } elsif($line =~ m/^(\s)$/s) { # it's a blank line if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line $wl_handler->(map $_, $line, $self->{'line_count'}, $self) if $wl_handler; } if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n"; push @{$paras->[-1]}, $line; } # otherwise it's not interesting if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n"; } $self->{'last_was_blank'} = 1; } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9])(\s+\|$)(.)/s) { # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS my $new = [$1, {'start_line' => $self->{'line_count'}}, $3]; $new->[1]{'~orig_spacer'} = $2 if $2 && $2 ne " "; # Note that in "=head1 foo", the WS is lost. # Example: ['=head1', {'start_line' => 123}, ' foo'] ++$self->{'pod_para_count'}; $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. push @$paras, $new; # the new incipient paragraph DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; } elsif($line =~ m/^\s/s) { if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n"; push @{$paras->[-1]}, $line; } else { ++$self->{'pod_para_count'}; $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n"; push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; } } else { ++$self->{'pod_para_count'}; $self->_ponder_paragraph_buffer(); # by now it's safe to consider the previous paragraph as done. push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n"; } $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; } else { # It's a non-blank line /continuing/ the current para if(@$paras) { DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n"; push @{$paras->[-1]}, $line; } else { # Unexpected case! die "Continuing a paragraph but \@\$paras is empty?"; } $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; } } # ends the big while loop DEBUG > 1 and print STDERR (pretty(@$paras), "\n"); return $self; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub _handle_encoding_line { my($self, $line) = @_; return if $self->parse_characters; # The point of this routine is to set $self->{'_transcoder'} as indicated. return $line unless $line =~ m/^=encoding\s+(\S+)\s$/s; DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n"; my $e = $1; my $orig = $e; push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; my $enc_error; # Cf. perldoc Encode and perldoc Encode::Supported require Pod::Simple::Transcode; if( $self->{'encoding'} ) { my $norm_current = $self->{'encoding'}; my $norm_e = $e; foreach my $that ($norm_current, $norm_e) { $that = lc($that); $that =~ s/[-_]//g; } if($norm_current eq $norm_e) { DEBUG > 1 and print STDERR "The '=encoding $orig' line is ", "redundant. ($norm_current eq $norm_e). Ignoring.\n"; $enc_error = ''; # But that doesn't necessarily mean that the earlier one went okay } else { $enc_error = "Encoding is already set to " . $self->{'encoding'}; DEBUG > 1 and print STDERR $enc_error; } } elsif ( # OK, let's turn on the encoding do { DEBUG > 1 and print STDERR " Setting encoding to $e\n"; $self->{'encoding'} = $e; 1; } and $e eq 'HACKRAW' ) { DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n"; } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { die($enc_error = "WHAT? _transcoder is already set?!") if $self->{'_transcoder'}; # should never happen require Pod::Simple::Transcode; $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); eval { my @x = ('', "abc", "123"); $self->{'_transcoder'}->(@x); }; $@ && die( $enc_error = "Really unexpected error setting up encoding $e: $@\nAborting" ); $self->{'detected_encoding'} = $e; } else { my @supported = Pod::Simple::Transcode::->all_encodings; # Note unsupported, and complain DEBUG and print STDERR " Encoding [$e] is unsupported.", "\nSupporteds: @supported\n"; my $suggestion = ''; # Look for a near match: my $norm = lc($e); $norm =~ tr[-_][]d; my $n; foreach my $enc (@supported) { $n = lc($enc); $n =~ tr[-_][]d; next unless $n eq $norm; $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; last; } my $encmodver = Pod::Simple::Transcode::->encmodver; $enc_error = join '' => "This document probably does not appear as it should, because its ", "\"=encoding $e\" line calls for an unsupported encoding.", $suggestion, " [$encmodver\'s supported encodings are: @supported]" ; $self->scream( $self->{'line_count'}, $enc_error ); } push @{ $self->{'encoding_command_statuses'} }, $enc_error; if (defined($self->{'_processed_encoding'})) { # Double declaration. $self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives'); } $self->{'_processed_encoding'} = $orig; return $line; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sub _handle_encoding_second_level { # By time this is called, the encoding (if well formed) will already # have been acted on. my($self, $para) = @_; my @x = @$para; my $content = join ' ', splice @x, 2; $content =~ s/^\s+//s; $content =~ s/\s+$//s; DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n"; if (defined($self->{'_processed_encoding'})) { #if($content ne $self->{'_processed_encoding'}) { # Could it happen? #} delete $self->{'_processed_encoding'}; # It's already been handled. Check for errors. if(! $self->{'encoding_command_statuses'} ) { DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n"; } elsif( $self->{'encoding_command_statuses'}[-1] ) { $self->whine( $para->[1]{'start_line'}, sprintf "Couldn't do %s: %s", $self->{'encoding_command_reqs' }[-1], $self->{'encoding_command_statuses'}[-1], ); } else { DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n"; } } else { # Otherwise it's a syntax error $self->whine( $para->[1]{'start_line'}, "Invalid =encoding syntax: $content" ); } return; } #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` { my $m = -321; # magic line number sub _gen_errata { my $self = $_[0]; # Return 0 or more fake-o paragraphs explaining the accumulated # errors on this document. return() unless $self->{'errata'} and keys %{$self->{'errata'}}; my @out; foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { push @out, ['=item', {'start_line' => $m}, "Around line $line:"], map( ['~Para', {'start_line' => $m, '~cooked' => 1}, #['~Top', {'start_line' => $m}, $_ #] ], @{$self->{'errata'}{$line}} ) ; } # TODO: report of unknown entities? unrenderable characters? unshift @out, ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, "Hey! ", ['B', {}, 'The above document had some coding errors, which are explained below:' ] ], ['=over', {'start_line' => $m, 'errata' => 1}, ''], ; push @out, ['=back', {'start_line' => $m, 'errata' => 1}, ''], ; DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n"; return @out; } } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ############################################################################## ## ## stop reading now stop reading now stop reading now stop reading now stop ## ## HERE IT BECOMES REALLY SCARY ## ## stop reading now stop reading now stop reading now stop reading now stop ## ############################################################################## sub _ponder_paragraph_buffer { # Para-token types as found in the buffer. # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, # =over, =back, =item # and the null =pod (to be complained about if over one line) # # "~data" paragraphs are something we generate at this level, depending on # a currently open =over region # Events fired: Begin and end for: # directivename (like head1 .. head4), item, extend, # for (from =begin...=end, =for), # over-bullet, over-number, over-text, over-block, # item-bullet, item-number, item-text, # Document, # Data, Para, Verbatim # B, C, longdirname (TODO -- wha?), etc. for all directives # my $self = $_[0]; my $paras; return unless @{$paras = $self->{'paras'}}; my $curr_open = ($self->{'curr_open'} \|\|= []); my $scratch; DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n"; # We have something in our buffer. So apparently the document has started. unless($self->{'doc_has_started'}) { $self->{'doc_has_started'} = 1; my $starting_contentless; $starting_contentless = ( !@$curr_open and @$paras and ! grep $_->[0] ne '~end', @$paras # i.e., if the paras is all ~ends ) ; DEBUG and print STDERR "# Starting ", $starting_contentless ? 'contentless' : 'contentful', " document\n" ; $self->_handle_element_start( ($scratch = 'Document'), { 'start_line' => $paras->[0][1]{'start_line'}, $starting_contentless ? ( 'contentless' => 1 ) : (), }, ); } my($para, $para_type); while(@$paras) { # If a directive, assume it's legal; subtract below if found not to be $seen_legal_directive++ if $paras->[0][0] =~ /^=/; last if @$paras == 1 and ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '=item' or ($paras->[0][0] eq '~Verbatim' and $self->{'in_pod'})); # Those're the three kinds of paragraphs that require lookahead. # Actually, an "=item Foo" inside an <over type=text> region # and any =item inside an <over type=block> region (rare) # don't require any lookahead, but all others (bullets # and numbers) do. # The verbatim is different from the other two, because those might be # like: # # =item # ... # =cut # ... # =item # # The =cut here finishes the paragraph but doesn't terminate the =over # they should be in. (khw apologizes that he didn't comment at the time # why the 'in_pod' works, and no longer remembers why, and doesn't think # it is currently worth the effort to re-figure it out.) # TODO: whinge about many kinds of directives in non-resolving =for regions? # TODO: many? like what? =head1 etc? $para = shift @$paras; $para_type = $para->[0]; DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (", $self->_dump_curr_open(), ")\n"; if($para_type eq '=for') { next if $self->_ponder_for($para,$curr_open,$paras); } elsif($para_type eq '=begin') { next if $self->_ponder_begin($para,$curr_open,$paras); } elsif($para_type eq '=end') { next if $self->_ponder_end($para,$curr_open,$paras); } elsif($para_type eq '~end') { # The virtual end-document signal next if $self->_ponder_doc_end($para,$curr_open,$paras); } # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if(grep $_->[1]{'~ignore'}, @$curr_open) { DEBUG > 1 and print STDERR "Skipping $para_type paragraph because in ignore mode.\n"; next; } #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ if($para_type eq '=pod') { $self->_ponder_pod($para,$curr_open,$paras); } elsif($para_type eq '=over') { next if $self->_ponder_over($para,$curr_open,$paras); } elsif($para_type eq '=back') { next if $self->_ponder_back($para,$curr_open,$paras); } else { # All non-magical codes!!! # Here we start using $para_type for our own twisted purposes, to # mean how it should get treated, not as what the element name # should be. DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n"; my $i; # Enforce some =headN discipline if($para_type =~ m/^=head\d$/s and ! $self->{'accept_heads_anywhere'} and @$curr_open and $curr_open->[-1][0] eq '=over' ) { DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n"; $self->whine( $para->[1]{'start_line'}, "You forgot a '=back' before '$para_type'" ); unshift @$paras, ['=back', {}, ''], $para; # close the =over next; } if($para_type eq '=item') { my $over; unless(@$curr_open and $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { $self->whine( $para->[1]{'start_line'}, "'=item' outside of any '=over'" ); unshift @$paras, ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], $para ; next; } my $over_type = $over->[1]{'~type'}; if(!$over_type) { # Shouldn't happen1 die "Typeless over in stack, starting at line " . $over->[1]{'start_line'}; } elsif($over_type eq 'block') { unless($curr_open->[-1][1]{'~bitched_about'}) { $curr_open->[-1][1]{'~bitched_about'} = 1; $self->whine( $curr_open->[-1][1]{'start_line'}, "You can't have =items (as at line " . $para->[1]{'start_line'} . ") unless the first thing after the =over is an =item" ); } # Just turn it into a paragraph and reconsider it $para->[0] = '~Para'; unshift @$paras, $para; next; } elsif($over_type eq 'text') { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; if($item_type eq 'text') { # Nothing special needs doing for 'text' } elsif($item_type eq 'number' or $item_type eq 'bullet') { $self->whine( $para->[1]{'start_line'}, "Expected text after =item, not a $item_type" ); # Undo our clobbering: push @$para, $para->[1]{'~orig_content'}; delete $para->[1]{'number'}; # Only a PROPER item-number element is allowed # to have a number attribute. } else { die "Unhandled item type $item_type"; # should never happen } # =item-text thingies don't need any assimilation, it seems. } elsif($over_type eq 'number') { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; if($item_type eq 'bullet') { # Hm, it's not numeric. Correct for this. $para->[1]{'number'} = $expected_value; $self->whine( $para->[1]{'start_line'}, "Expected '=item $expected_value'" ); push @$para, $para->[1]{'~orig_content'}; # restore the bullet, blocking the assimilation of next para } elsif($item_type eq 'text') { # Hm, it's not numeric. Correct for this. $para->[1]{'number'} = $expected_value; $self->whine( $para->[1]{'start_line'}, "Expected '=item $expected_value'" ); # Text content will still be there and will block next ~Para } elsif($item_type ne 'number') { die "Unknown item type $item_type"; # should never happen } elsif($expected_value == $para->[1]{'number'}) { DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; } else { DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, " instead of the expected value of $expected_value\n"; $self->whine( $para->[1]{'start_line'}, "You have '=item " . $para->[1]{'number'} . "' instead of the expected '=item $expected_value'" ); $para->[1]{'number'} = $expected_value; # correcting!! } if(@$para == 2) { # For the cases where we /didn't/ push to @$para if($paras->[0][0] eq '~Para') { DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; push @$para, splice @{shift @$paras},2; } else { DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; push @$para, ''; # Just so it's not contentless } } } elsif($over_type eq 'bullet') { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; if($item_type eq 'bullet') { # as expected! if( $para->[1]{'~_freaky_para_hack'} ) { DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n"; push @$para, $para->[1]{'~_freaky_para_hack'}; } } elsif($item_type eq 'number') { $self->whine( $para->[1]{'start_line'}, "Expected '=item '" ); push @$para, $para->[1]{'~orig_content'}; # and block assimilation of the next paragraph delete $para->[1]{'number'}; # Only a PROPER item-number element is allowed # to have a number attribute. } elsif($item_type eq 'text') { $self->whine( $para->[1]{'start_line'}, "Expected '=item '" ); # But doesn't need processing. But it'll block assimilation # of the next para. } else { die "Unhandled item type $item_type"; # should never happen } if(@$para == 2) { # For the cases where we /didn't/ push to @$para if($paras->[0][0] eq '~Para') { DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; push @$para, splice @{shift @$paras},2; } else { DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; push @$para, ''; # Just so it's not contentless } } } else { die "Unhandled =over type \"$over_type\"?"; # Shouldn't happen! } $para_type = 'Plain'; $para->[0] .= '-' . $over_type; # Whew. Now fall thru and process it. } elsif($para_type eq '=extend') { # Well, might as well implement it here. $self->_ponder_extend($para); next; # and skip } elsif($para_type eq '=encoding') { # Not actually acted on here, but we catch errors here. $self->_handle_encoding_second_level($para); next unless $self->keep_encoding_directive; $para_type = 'Plain'; } elsif($para_type eq '~Verbatim') { $para->[0] = 'Verbatim'; $para_type = '?Verbatim'; } elsif($para_type eq '~Para') { $para->[0] = 'Para'; $para_type = '?Plain'; } elsif($para_type eq 'Data') { $para->[0] = 'Data'; $para_type = '?Data'; } elsif( $para_type =~ s/^=//s and defined( $para_type = $self->{'accept_directives'}{$para_type} ) ) { DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n"; } else { # An unknown directive! $seen_legal_directive--; DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n", $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) ; $self->whine( $para->[1]{'start_line'}, "Unknown directive: $para->[0]" ); # And maybe treat it as text instead of just letting it go? next; } if($para_type =~ s/^\?//s) { if(! @$curr_open) { # usual case DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n"; } else { my @fors = grep $_->[0] eq '=for', @$curr_open; DEBUG > 1 and print STDERR "Containing fors: ", join(',', map $_->[1]{'target'}, @fors), "\n"; if(! @fors) { DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n"; #} elsif(grep $_->[1]{'~resolve'}, @fors) { #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { } elsif( $fors[-1][1]{'~resolve'} ) { # Look to the immediately containing for if($para_type eq 'Data') { DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; $para->[0] = 'Para'; $para_type = 'Plain'; } else { DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; } } else { DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; $para->[0] = $para_type = 'Data'; } } } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if($para_type eq 'Plain') { $self->_ponder_Plain($para); } elsif($para_type eq 'Verbatim') { $self->_ponder_Verbatim($para); } elsif($para_type eq 'Data') { $self->_ponder_Data($para); } else { die "\$para type is $para_type -- how did that happen?"; # Shouldn't happen. } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $para->[0] =~ s/^[~=]//s; DEBUG and print STDERR "\n", pretty($para), "\n"; # traverse the treelet (which might well be just one string scalar) $self->{'content_seen'} \|\|= 1 if $seen_legal_directive && ! $self->{'~tried_gen_errata'}; $self->_traverse_treelet_bit(@$para); } } return; } ########################################################################### # The sub-ponderers... sub _ponder_for { my ($self,$para,$curr_open,$paras) = @_; # Fake it out as a begin/end my $target; if(grep $_->[1]{'~ignore'}, @$curr_open) { DEBUG > 1 and print STDERR "Ignoring ignorable =for\n"; return 1; } for(my $i = 2; $i < @$para; ++$i) { if($para->[$i] =~ s/^\s(\S+)\s//s) { $target = $1; last; } } unless(defined $target) { $self->whine( $para->[1]{'start_line'}, "=for without a target?" ); return 1; } DEBUG > 1 and print STDERR "Faking out a =for $target as a =begin $target / =end $target\n"; $para->[0] = 'Data'; unshift @$paras, ['=begin', {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, $target, ], $para, ['=end', {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, $target, ], ; return 1; } sub _ponder_begin { my ($self,$para,$curr_open,$paras) = @_; my $content = join ' ', splice @$para, 2; $content =~ s/^\s+//s; $content =~ s/\s+$//s; unless(length($content)) { $self->whine( $para->[1]{'start_line'}, "=begin without a target?" ); DEBUG and print STDERR "Ignoring targetless =begin\n"; return 1; } my ($target, $title) = $content =~ m/^(\S+)\s(.)$/; $para->[1]{'title'} = $title if ($title); $para->[1]{'target'} = $target; # without any ':' $content = $target; # strip off the title $content =~ s/^:!/!:/s; my $neg; # whether this is a negation-match $neg = 1 if $content =~ s/^!//s; my $to_resolve; # whether to process formatting codes $to_resolve = 1 if $content =~ s/^://s; my $dont_ignore; # whether this target matches us foreach my $target_name ( split(',', $content, -1), $neg ? () : '' ) { DEBUG > 2 and print STDERR " Considering whether =begin $content matches $target_name\n"; next unless $self->{'accept_targets'}{$target_name}; DEBUG > 2 and print STDERR " It DOES match the acceptable target $target_name!\n"; $to_resolve = 1 if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; $dont_ignore = 1; $para->[1]{'target_matching'} = $target_name; last; # stop looking at other target names } if($neg) { if( $dont_ignore ) { $dont_ignore = ''; delete $para->[1]{'target_matching'}; DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n"; } else { $dont_ignore = 1; $para->[1]{'target_matching'} = '!'; DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n"; } } $para->[0] = '=for'; # Just what we happen to call these, internally $para->[1]{'~really'} \|\|= '=begin'; $para->[1]{'~ignore'} = (! $dont_ignore) \|\| 0; $para->[1]{'~resolve'} = $to_resolve \|\| 0; DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '', "ignore contents of this region\n"; DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ", ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n"; push @$curr_open, $para; if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n"; } else { $self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'}; $self->_handle_element_start((my $scratch='for'), $para->[1]); } return 1; } sub _ponder_end { my ($self,$para,$curr_open,$paras) = @_; my $content = join ' ', splice @$para, 2; $content =~ s/^\s+//s; $content =~ s/\s+$//s; DEBUG and print STDERR "Ogling '=end $content' directive\n"; unless(length($content)) { $self->whine( $para->[1]{'start_line'}, "'=end' without a target?" . ( ( @$curr_open and $curr_open->[-1][0] eq '=for' ) ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) : '' ) ); DEBUG and print STDERR "Ignoring targetless =end\n"; return 1; } unless($content =~ m/^\S+$/) { # i.e., unless it's one word $self->whine( $para->[1]{'start_line'}, "'=end $content' is invalid. (Stack: " . $self->_dump_curr_open() . ')' ); DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; return 1; } unless(@$curr_open and $curr_open->[-1][0] eq '=for') { $self->whine( $para->[1]{'start_line'}, "=end $content without matching =begin. (Stack: " . $self->_dump_curr_open() . ')' ); DEBUG and print STDERR "Ignoring mistargetted =end $content\n"; return 1; } unless($content eq $curr_open->[-1][1]{'target'}) { $self->whine( $para->[1]{'start_line'}, "=end $content doesn't match =begin " . $curr_open->[-1][1]{'target'} . ". (Stack: " . $self->_dump_curr_open() . ')' ); DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; return 1; } # Else it's okay to close... if(grep $_->[1]{'~ignore'}, @$curr_open) { DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n"; # And that may be because of this to-be-closed =for region, or some # other one, but it doesn't matter. } else { $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; # what's that for? $self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'}; $self->_handle_element_end( my $scratch = 'for', $para->[1]); } DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; pop @$curr_open; return 1; } sub _ponder_doc_end { my ($self,$para,$curr_open,$paras) = @_; if(@$curr_open) { # Deal with things left open DEBUG and print STDERR "Stack is nonempty at end-document: (", $self->_dump_curr_open(), ")\n"; DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n"; unshift @$paras, $self->_closers_for_all_curr_open; # Make sure there is exactly one ~end in the parastack, at the end: @$paras = grep $_->[0] ne '~end', @$paras; push @$paras, $para, $para; # We need two -- once for the next cycle where we # generate errata, and then another to be at the end # when that loop back around to process the errata. return 1; } else { DEBUG and print STDERR "Okay, stack is empty now.\n"; } # Try generating errata section, if applicable unless($self->{'~tried_gen_errata'}) { $self->{'~tried_gen_errata'} = 1; my @extras = $self->_gen_errata(); if(@extras) { unshift @$paras, @extras; DEBUG and print STDERR "Generated errata... relooping...\n"; return 1; # I.e., loop around again to process these fake-o paragraphs } } splice @$paras; # Well, that's that for this paragraph buffer. DEBUG and print STDERR "Throwing end-document event.\n"; $self->_handle_element_end( my $scratch = 'Document' ); return 1; # Hasta la byebye } sub _ponder_pod { my ($self,$para,$curr_open,$paras) = @_; $self->whine( $para->[1]{'start_line'}, "=pod directives shouldn't be over one line long! Ignoring all " . (@$para - 2) . " lines of content" ) if @$para > 3; # Content ignored unless 'pod_handler' is set if (my $pod_handler = $self->{'pod_handler'}) { my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output $pod_handler->($line, $line_num, $self); } # The surrounding methods set content_seen, so let us remain consistent. # I do not know why it was not here before -- should it not be here? # $self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'}; return; } sub _ponder_over { my ($self,$para,$curr_open,$paras) = @_; return 1 unless @$paras; my $list_type; if($paras->[0][0] eq '=item') { # most common case $list_type = $self->_get_initial_item_type($paras->[0]); } elsif($paras->[0][0] eq '=back') { # Ignore empty lists by default if ($self->{'parse_empty_lists'}) { $list_type = 'empty'; } else { shift @$paras; return 1; } } elsif($paras->[0][0] eq '~end') { $self->whine( $para->[1]{'start_line'}, "=over is the last thing in the document?!" ); return 1; # But feh, ignore it. } else { $list_type = 'block'; } $para->[1]{'~type'} = $list_type; push @$curr_open, $para; # yes, we reuse the paragraph as a stack item my $content = join ' ', splice @$para, 2; $para->[1]{'~orig_content'} = $content; my $overness; if($content =~ m/^\s$/s) { $para->[1]{'indent'} = 4; } elsif($content =~ m/^\s((?:\d\.)?\d+)\s$/s) { no integer; $para->[1]{'indent'} = $1; if($1 == 0) { $self->whine( $para->[1]{'start_line'}, "Can't have a 0 in =over $content" ); $para->[1]{'indent'} = 4; } } else { $self->whine( $para->[1]{'start_line'}, "=over should be: '=over' or '=over positive_number'" ); $para->[1]{'indent'} = 4; } DEBUG > 1 and print STDERR "=over found of type $list_type\n"; $self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'}; $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); return; } sub _ponder_back { my ($self,$para,$curr_open,$paras) = @_; # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? my $content = join ' ', splice @$para, 2; if($content =~ m/\S/) { $self->whine( $para->[1]{'start_line'}, "=back doesn't take any parameters, but you said =back $content" ); } if(@$curr_open and $curr_open->[-1][0] eq '=over') { DEBUG > 1 and print STDERR "=back happily closes matching =over\n"; # Expected case: we're closing the most recently opened thing #my $over = pop @$curr_open; $self->{'content_seen'} \|\|= 1 unless $self->{'~tried_gen_errata'}; $self->_handle_element_end( my $scratch = 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] ); } else { DEBUG > 1 and print STDERR "=back found without a matching =over. Stack: (", join(', ', map $_->[0], @$curr_open), ").\n"; $self->whine( $para->[1]{'start_line'}, '=back without =over' ); return 1; # and ignore it } } sub _ponder_item { my ($self,$para,$curr_open,$paras) = @_; my $over; unless(@$curr_open and $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { $self->whine( $para->[1]{'start_line'}, "'=item' outside of any '=over'" ); unshift @$paras, ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], $para ; return 1; } my $over_type = $over->[1]{'~type'}; if(!$over_type) { # Shouldn't happen1 die "Typeless over in stack, starting at line " . $over->[1]{'start_line'}; } elsif($over_type eq 'block') { unless($curr_open->[-1][1]{'~bitched_about'}) { $curr_open->[-1][1]{'~bitched_about'} = 1; $self->whine( $curr_open->[-1][1]{'start_line'}, "You can't have =items (as at line " . $para->[1]{'start_line'} . ") unless the first thing after the =over is an =item" ); } # Just turn it into a paragraph and reconsider it $para->[0] = '~Para'; unshift @$paras, $para; return 1; } elsif($over_type eq 'text') { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; if($item_type eq 'text') { # Nothing special needs doing for 'text' } elsif($item_type eq 'number' or $item_type eq 'bullet') { $self->whine( $para->[1]{'start_line'}, "Expected text after =item, not a $item_type" ); # Undo our clobbering: push @$para, $para->[1]{'~orig_content'}; delete $para->[1]{'number'}; # Only a PROPER item-number element is allowed # to have a number attribute. } else { die "Unhandled item type $item_type"; # should never happen } # =item-text thingies don't need any assimilation, it seems. } elsif($over_type eq 'number') { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; if($item_type eq 'bullet') { # Hm, it's not numeric. Correct for this. $para->[1]{'number'} = $expected_value; $self->whine( $para->[1]{'start_line'}, "Expected '=item $expected_value'" ); push @$para, $para->[1]{'~orig_content'}; # restore the bullet, blocking the assimilation of next para } elsif($item_type eq 'text') { # Hm, it's not numeric. Correct for this. $para->[1]{'number'} = $expected_value; $self->whine( $para->[1]{'start_line'}, "Expected '=item $expected_value'" ); # Text content will still be there and will block next ~Para } elsif($item_type ne 'number') { die "Unknown item type $item_type"; # should never happen } elsif($expected_value == $para->[1]{'number'}) { DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n"; } else { DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'}, " instead of the expected value of $expected_value\n"; $self->whine( $para->[1]{'start_line'}, "You have '=item " . $para->[1]{'number'} . "' instead of the expected '=item $expected_value'" ); $para->[1]{'number'} = $expected_value; # correcting!! } if(@$para == 2) { # For the cases where we /didn't/ push to @$para if($paras->[0][0] eq '~Para') { DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; push @$para, splice @{shift @$paras},2; } else { DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; push @$para, ''; # Just so it's not contentless } } } elsif($over_type eq 'bullet') { my $item_type = $self->_get_item_type($para); # That kills the content of the item if it's a number or bullet. DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n"; if($item_type eq 'bullet') { # as expected! if( $para->[1]{'~_freaky_para_hack'} ) { DEBUG and print STDERR "Accomodating '=item Foo' tolerance hack.\n"; push @$para, $para->[1]{'~_freaky_para_hack'}; } } elsif($item_type eq 'number') { $self->whine( $para->[1]{'start_line'}, "Expected '=item '" ); push @$para, $para->[1]{'~orig_content'}; # and block assimilation of the next paragraph delete $para->[1]{'number'}; # Only a PROPER item-number element is allowed # to have a number attribute. } elsif($item_type eq 'text') { $self->whine( $para->[1]{'start_line'}, "Expected '=item '" ); # But doesn't need processing. But it'll block assimilation # of the next para. } else { die "Unhandled item type $item_type"; # should never happen } if(@$para == 2) { # For the cases where we /didn't/ push to @$para if($paras->[0][0] eq '~Para') { DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n"; push @$para, splice @{shift @$paras},2; } else { DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n"; push @$para, ''; # Just so it's not contentless } } } else { die "Unhandled =over type \"$over_type\"?"; # Shouldn't happen! } $para->[0] .= '-' . $over_type; return; } sub _ponder_Plain { my ($self,$para) = @_; DEBUG and print STDERR " giving plain treatment...\n"; unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) or $para->[1]{'~cooked'} ) { push @$para, @{$self->_make_treelet( join("\n", splice(@$para, 2)), $para->[1]{'start_line'} )}; } # Empty paragraphs don't need a treelet for any reason I can see. # And precooked paragraphs already have a treelet. return; } sub _ponder_Verbatim { my ($self,$para) = @_; DEBUG and print STDERR " giving verbatim treatment...\n"; $para->[1]{'xml:space'} = 'preserve'; unless ($self->{'_output_is_for_JustPod'}) { # Fix illegal settings for expand_verbatim_tabs() # This is because this module doesn't do input error checking, but khw # doesn't want to add yet another instance of that. $self->expand_verbatim_tabs(8) if ! defined $self->expand_verbatim_tabs() \|\| $self->expand_verbatim_tabs() =~ /\D/; my $indent = $self->strip_verbatim_indent; if ($indent && ref $indent eq 'CODE') { my @shifted = (shift @{$para}, shift @{$para}); $indent = $indent->($para); unshift @{$para}, @shifted; } for(my $i = 2; $i < @$para; $i++) { foreach my $line ($para->[$i]) { # just for aliasing # Strip indentation. $line =~ s/^\Q$indent// if $indent; next unless $self->expand_verbatim_tabs; # This is commented out because of github issue #85, and the # current maintainers don't know why it was there in the first # place. #&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); while( $line =~ # Sort of adapted from Text::Tabs. s/^([^\t])(\t+)/$1.(" " x ((length($2) $self->expand_verbatim_tabs) -(length($1)&7)))/e ) {} # TODO: whinge about (or otherwise treat) unindented or overlong lines } } } # Now the VerbatimFormatted hoodoo... if( $self->{'accept_codes'} and $self->{'accept_codes'}{'VerbatimFormatted'} ) { while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } # Kill any number of terminal newlines $self->_verbatim_format($para); } elsif ($self->{'codes_in_verbatim'}) { push @$para, @{$self->_make_treelet( join("\n", splice(@$para, 2)), $para->[1]{'start_line'}, $para->[1]{'xml:space'} )}; $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines } else { push @$para, join "\n", splice(@$para, 2) if @$para > 3; $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines } return; } sub _ponder_Data { my ($self,$para) = @_; DEBUG and print STDERR " giving data treatment...\n"; $para->[1]{'xml:space'} = 'preserve'; push @$para, join "\n", splice(@$para, 2) if @$para > 3; return; } ########################################################################### sub _traverse_treelet_bit { # for use only by the routine above my($self, $name) = splice @_,0,2; my $scratch; $self->_handle_element_start(($scratch=$name), shift @_); while (@_) { my $x = shift; if (ref($x)) { &_traverse_treelet_bit($self, @$x); } else { $x .= shift while @_ && !ref($_[0]); $self->_handle_text($x); } } $self->_handle_element_end($scratch=$name); return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub _closers_for_all_curr_open { my $self = $_[0]; my @closers; foreach my $still_open (@{ $self->{'curr_open'} \|\| return }) { my @copy = @$still_open; $copy[1] = {%{ $copy[1] }}; #$copy[1]{'start_line'} = -1; if($copy[0] eq '=for') { $copy[0] = '=end'; } elsif($copy[0] eq '=over') { $self->whine( $still_open->[1]{start_line} , "=over without closing =back" ); $copy[0] = '=back'; } else { die "I don't know how to auto-close an open $copy[0] region"; } unless( @copy > 2 ) { push @copy, $copy[1]{'target'}; $copy[-1] = '' unless defined $copy[-1]; # since =over's don't have targets } $copy[1]{'fake-closer'} = 1; DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n"; unshift @closers, \@copy; } return @closers; } #-------------------------------------------------------------------------- sub _verbatim_format { my($it, $p) = @_; my $formatting; for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n"; $p->[$i] .= "\n"; # Unlike with simple Verbatim blocks, we don't end up just doing # a join("\n", ...) on the contents, so we have to append a # newline to every line, and then nix the last one later. } if( DEBUG > 4 ) { print STDERR "<<\n"; for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines print STDERR "_verbatim_format $i: $p->[$i]"; } print STDERR ">>\n"; } for(my $i = $#$p; $i > 2; $i--) { # work backwards over the lines, except the first (#2) #next unless $p->[$i] =~ m{^#:([ \^\/\%])\n?$}s # and $p->[$i-1] !~ m{^#:[ \^\/\%]\n?$}s; # look at a formatty line preceding a nonformatty one DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n"; if($p->[$i] =~ m{^#:([ \^\/\%])\n?$}s) { DEBUG > 5 and print STDERR " It's a formatty line. ", "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; if( $p->[$i-1] =~ m{^#:[ \^\/\%]\n?$}s ) { DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n"; next; } else { DEBUG > 5 and print STDERR " Previous line is non-formatty! Yay!\n"; } } else { DEBUG > 5 and print STDERR " It's not a formatty line. Ignoring\n"; next; } # A formatty line has to have #: in the first two columns, and uses # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. # Example: # What do you want? i like pie. [or whatever] # #:^^^^^^^^^^^^^^^^^ ///////////// DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; $formatting = ' ' . $1; $formatting =~ s/\s+$//s; # nix trailing whitespace unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op splice @$p,$i,1; # remove this line $i--; # don't consider next line next; } if( length($formatting) >= length($p->[$i-1]) ) { $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; } else { $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); } # Make $formatting and the previous line be exactly the same length, # with $formatting having a " " as the last character. DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n"; my @new_line; while( $formatting =~ m{\G(( +)\|(\^+)\|(\/+)\|(\%+))}g ) { #print STDERR "Format matches $1\n"; if($2) { #print STDERR "SKIPPING <$2>\n"; push @new_line, substr($p->[$i-1], pos($formatting)-length($1), length($1)); } else { #print STDERR "SNARING $+\n"; push @new_line, [ ( $3 ? 'VerbatimB' : $4 ? 'VerbatimI' : $5 ? 'VerbatimBI' : die("Should never get called") ), {}, substr($p->[$i-1], pos($formatting)-length($1), length($1)) ]; #print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; } } my @nixed = splice @$p, $i-1, 2, @new_line; # replace myself and the next line DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n"; DEBUG > 6 and print STDERR "New version of the above line is these tokens (", scalar(@new_line), "):", map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; $i--; # So the next line we scrutinize is the line before the one # that we just went and formatted } $p->[0] = 'VerbatimFormatted'; # Collapse adjacent text nodes, just for kicks. for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; $p->[$i] .= splice @$p, $i+1, 1; # merge --$i; # and back up } } # Now look for the last text token, and remove the terminal newline for( my $i = $#$p; $i >= 2; $i-- ) { # work backwards over the tokens, even the first if( !ref($p->[$i]) ) { if($p->[$i] =~ s/\n$//s) { DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; } else { DEBUG > 5 and print STDERR "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; } last; # we only want the next one } } return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub _treelet_from_formatting_codes { # Given a paragraph, returns a treelet. Full of scary tokenizing code. # Like [ '~Top', {'start_line' => $start_line}, # "I like ", # [ 'B', {}, "pie" ], # "!" # ] # This illustrates the general format of a treelet. It is an array: # [0] is a scalar indicating its type. In the example above, the # types are '~Top' and 'B' # [1] is a hash of various flags about it, possibly empty # [2] - [N] are an ordered list of the subcomponents of the treelet. # Scalars are literal text, refs are sub-treelets, to # arbitrary levels. Stringifying a treelet will recursively # stringify the sub-treelets, concatentating everything # together to form the exact text of the treelet. my($self, $para, $start_line, $preserve_space) = @_; my $treelet = ['~Top', {'start_line' => $start_line},]; unless ($preserve_space \|\| $self->{'preserve_whitespace'}) { $para =~ s/\s+/ /g; # collapse and trim all whitespace first. $para =~ s/ $//; $para =~ s/^ //; } # Only apparent problem the above code is that N<< >> turns into # N<< >>. But then, word wrapping does that too! So don't do that! # As a Start-code is encountered, the number of opening bracket '<' # characters minus 1 is pushed onto @stack (so 0 means a single bracket, # etc). When closing brackets are found in the text, at least this number # (plus the 1) will be required to mean the Start-code is terminated. When # those are found, @stack is popped. my @stack; my @lineage = ($treelet); my $raw = ''; # raw content of L<> fcode before splitting/processing # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed # into just 1 ' '. Is this the regex's doing or 'raw's? Answer is it's # the 'collapse and trim all whitespace first' lines just above. my $inL = 0; DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n"; # Here begins our frightening tokenizer RE. The following regex matches # text in four main parts: # # * Start-codes. The first alternative matches C< or C<<, the latter # followed by some whitespace. $1 will hold the entire start code # (including any space following a multiple-angle-bracket delimiter), # and $2 will hold only the additional brackets past the first in a # multiple-bracket delimiter. length($2) + 1 will be the number of # closing brackets we have to find. # # * Closing brackets. Match some amount of whitespace followed by # multiple close brackets. The logic to see if this closes anything # is down below. Note that in order to parse C<< >> correctly, we # have to use look-behind (?<=\s\s), since the match of the starting # code will have consumed the whitespace. # # * A single closing bracket, to close a simple code like C<>. # # * Something that isn't a start or end code. We have to be careful # about accepting whitespace, since perlpodspec says that any whitespace # before a multiple-bracket closing delimiter should be ignored. # while($para =~ m/\G (?: # Match starting codes, including the whitespace following a # multiple-delimiter start code. $1 gets the whole start code and # $2 gets all but one of the <s in the multiple-bracket case. ([A-Z]<(?:(<+)\s+)?) \| # Match multiple-bracket end codes. $3 gets the whitespace that # should be discarded before an end bracket but kept in other cases # and $4 gets the end brackets themselves. ($3 can be empty if the # construct is empty, like C<< >>, and all the white-space has been # gobbled up already, considered to be space after the opening # bracket. In this case we use look-behind to verify that there are # at least 2 spaces in a row before the ">".) (\s+\|(?<=\s\s))(>{2,}) \| (\s?>) # $5: simple end-codes \| ( # $6: stuff containing no start-codes or end-codes (?: [^A-Z\s>] \| (?: [A-Z](?!<) ) \| # whitespace is ok, but we don't want to eat the whitespace before # a multiple-bracket end code. # NOTE: we may still have problems with e.g. S<< >> (?: \s(?!\s>{2,}) ) )+ ) ) /xgo ) { DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n"; if(defined $1) { my $bracket_count; # How many '<<<' in a row this has. Needed for # Pod::Simple::JustPod if(defined $2) { DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n"; $bracket_count = length($2) + 1; push @stack, $bracket_count; # length of the necessary complex # end-code string } else { DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n"; push @stack, 0; # signal that we're looking for simple $bracket_count = 1; } my $code = substr($1,0,1); if ('L' eq $code) { if ($inL) { $raw .= $1; $self->scream( $start_line, 'Nested L<> are illegal. Pretending inner one is ' . 'X<...> so can continue looking for other errors.'); $code = "X"; } else { $raw = ""; # reset raw content accumulator $inL = @stack; } } else { $raw .= $1 if $inL; } push @lineage, [ $code, {}, ]; # new node object # Tell Pod::Simple::JustPod how many brackets there were, but to save # space, not in the most usual case of there was just 1. It can be # inferred by the absence of this element. Similarly, if there is more # than one bracket, extract the white space between the final bracket # and the real beginning of the interior. Save that if it isn't just a # single space if ($self->{'_output_is_for_JustPod'} && $bracket_count > 1) { $lineage[-1][1]{'~bracket_count'} = $bracket_count; my $lspacer = substr($1, 1 + $bracket_count); $lineage[-1][1]{'~lspacer'} = $lspacer if $lspacer ne " "; } push @{ $lineage[-2] }, $lineage[-1]; } elsif(defined $4) { DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n"; # This is where it gets messy... if(! @stack) { # We saw " >>>>" but needed nothing. This is ALL just stuff then. DEBUG > 4 and print STDERR " But it's really just stuff.\n"; push @{ $lineage[-1] }, $3, $4; next; } elsif(!$stack[-1]) { # We saw " >>>>" but needed only ">". Back pos up. DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n"; push @{ $lineage[-1] }, $3; # That was a for-real space, too. pos($para) = pos($para) - length($4) + 1; } elsif($stack[-1] == length($4)) { # We found " >>>>", and it was exactly what we needed. Commonest case. DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n"; } elsif($stack[-1] < length($4)) { # We saw " >>>>" but needed only " >>". Back pos up. DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n"; pos($para) = pos($para) - length($4) + $stack[-1]; } else { # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n"; push @{ $lineage[-1] }, $3, $4; next; } #print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; if ($3 ne " " && $self->{'_output_is_for_JustPod'}) { if ($3 ne "") { $lineage[-1][1]{'~rspacer'} = $3; } elsif ($lineage[-1][1]{'~lspacer'} eq " ") { # Here we had something like C<< >> which was a false positive delete $lineage[-1][1]{'~lspacer'}; } else { $lineage[-1][1]{'~rspacer'} = substr($lineage[-1][1]{'~lspacer'}, -1, 1); chop $lineage[-1][1]{'~lspacer'}; } } push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; # Keep the element from being childless if ($inL == @stack) { $lineage[-1][1]{'raw'} = $raw; $inL = 0; } pop @stack; pop @lineage; $raw .= $3.$4 if $inL; } elsif(defined $5) { DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n"; if(@stack and ! $stack[-1]) { # We're indeed expecting a simple end-code DEBUG > 4 and print STDERR " It's indeed an end-code.\n"; if(length($5) == 2) { # There was a space there: " >" push @{ $lineage[-1] }, ' '; } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element push @{ $lineage[-1] }, ''; # keep it from being really childless } if ($inL == @stack) { $lineage[-1][1]{'raw'} = $raw; $inL = 0; } pop @stack; pop @lineage; } else { DEBUG > 4 and print STDERR " It's just stuff.\n"; push @{ $lineage[-1] }, $5; } $raw .= $5 if $inL; } elsif(defined $6) { DEBUG > 3 and print STDERR "Found stuff \"$6\"\n"; push @{ $lineage[-1] }, $6; $raw .= $6 if $inL; # XXX does not capture multiplace whitespaces -- 'raw' ends up with # at most 1 leading/trailing whitespace, why not all of it? # Answer, because we deliberately trimmed it above } else { # should never ever ever ever happen DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n"; die "SPORK 512512!"; } } if(@stack) { # Uhoh, some sequences weren't closed. my $x= "..."; while(@stack) { push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; # Hmmmmm! my $code = (pop @lineage)->[0]; my $ender_length = pop @stack; if($ender_length) { --$ender_length; $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); } else { $x = $code . "<$x>"; } } DEBUG > 1 and print STDERR "Unterminated $x sequence\n"; $self->whine($start_line, "Unterminated $x sequence", ); } return $treelet; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) return stringify_lol($_[1]); } sub stringify_lol { # function: stringify_lol($lol) my $string_form = ''; _stringify_lol( $_[0] => \$string_form ); return $string_form; } sub _stringify_lol { # the real recursor my($lol, $to) = @_; for(my $i = 2; $i < @$lol; ++$i) { if( ref($lol->[$i] \|\| '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { _stringify_lol( $lol->[$i], $to); # recurse! } else { $$to .= $lol->[$i]; } } return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ sub _dump_curr_open { # return a string representation of the stack my $curr_open = $_[0]{'curr_open'}; return '[empty]' unless @$curr_open; return join '; ', map {; ($_->[0] eq '=for') ? ( ($_->[1]{'~really'} \|\| '=over') . ' ' . $_->[1]{'target'}) : $_->[0] } @$curr_open ; } ########################################################################### my %pretty_form = ( "\a" => '\a', # ding! "\b" => '\b', # BS "\e" => '\e', # ESC "\f" => '\f', # FF "\t" => '\t', # tab "\cm" => '\cm', "\cj" => '\cj', "\n" => '\n', # probably overrides one of either \cm or \cj '"' => '\"', '\\' => '\\\\', '$' => '\\$', '@' => '\\@', '%' => '\\%', '#' => '\\#', ); sub pretty { # adopted from Class::Classless # Not the most brilliant routine, but passable. # Don't give it a cyclic data structure! my @stuff = @_; # copy my $x; my $out = # join ",\n" . join ", ", map {; if(!defined($_)) { "undef"; } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { $x = "[ " . pretty(@$_) . " ]" ; $x; } elsif(ref($_) eq 'SCALAR') { $x = "\\" . pretty($$_) ; $x; } elsif(ref($_) eq 'HASH') { my $hr = $_; $x = "{" . join(", ", map(pretty($_) . '=>' . pretty($hr->{$_}), sort keys %$hr ) ) . "}" ; $x; } elsif(!length($_)) { q{''} # empty string } elsif( $_ eq '0' # very common case or( m/^-?(?:[123456789]\d\|0)(?:\.\d+)?$/s and $_ ne '-0' # the strange case that RE lets thru ) ) { $_; } else { # Yes, explicitly name every character desired. There are shorcuts one # could make, but I (Karl Williamson) was afraid that some Perl # releases would have bugs in some of them. For example [A-Z] works # even on EBCDIC platforms to match exactly the 26 uppercase English # letters, but I don't know if it has always worked without bugs. It # seemed safest just to list the characters. # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> s<([^ !"#'()+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~])> <$pretty_form{$1} \|\| '\\x{'.sprintf("%x", ord($1)).'}'>eg; #<$pretty_form{$1} \|\| '\\x'.(unpack("H2",$1))>eg; qq{"$_"}; } } @stuff; # $out =~ s/\n / /g if length($out) < 75; return $out; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # A rather unsubtle method of blowing away all the state information # from a parser object so it can be reused. Provided as a utility for # backward compatibility in Pod::Man, etc. but not recommended for # general use. sub reinit { my $self = shift; foreach (qw(source_dead source_filename doc_has_started start_of_pod_block content_seen last_was_blank paras curr_open line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen Title)) { delete $self->{$_}; } } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1; PK��]"\S�N��Text.pmnu��[�� require 5; package Pod::Simple::Text; use strict; use Carp (); use Pod::Simple::Methody (); use Pod::Simple (); use vars qw( @ISA $VERSION $FREAKYMODE); $VERSION = '3.42'; @ISA = ('Pod::Simple::Methody'); BEGIN { DEBUG = defined(&Pod::Simple::DEBUG) ? \&Pod::Simple::DEBUG : sub() {0} } use Text::Wrap 98.112902 (); $Text::Wrap::huge = 'overflow'; #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub new { my $self = shift; my $new = $self->SUPER::new(@_); $new->{'output_fh'} \|\|= STDOUT{IO}; $new->accept_target_as_text(qw( text plaintext plain )); $new->nix_X_codes(1); $new->nbsp_for_S(1); $new->{'Thispara'} = ''; $new->{'Indent'} = 0; $new->{'Indentstring'} = ' '; return $new; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sub handle_text { $_[0]{'Thispara'} .= $_[1] } sub start_Para { $_[0]{'Thispara'} = '' } sub start_head1 { $_[0]{'Thispara'} = '' } sub start_head2 { $_[0]{'Thispara'} = '' } sub start_head3 { $_[0]{'Thispara'} = '' } sub start_head4 { $_[0]{'Thispara'} = '' } sub start_Verbatim { $_[0]{'Thispara'} = '' } sub start_item_bullet { $_[0]{'Thispara'} = $FREAKYMODE ? '' : '* ' } sub start_item_number { $_[0]{'Thispara'} = $FREAKYMODE ? '' : "$_[1]{'number'}. " } sub start_item_text { $_[0]{'Thispara'} = '' } sub start_over_bullet { ++$_[0]{'Indent'} } sub start_over_number { ++$_[0]{'Indent'} } sub start_over_text { ++$_[0]{'Indent'} } sub start_over_block { ++$_[0]{'Indent'} } sub end_over_bullet { --$_[0]{'Indent'} } sub end_over_number { --$_[0]{'Indent'} } sub end_over_text { --$_[0]{'Indent'} } sub end_over_block { --$_[0]{'Indent'} } # . . . . . Now the actual formatters: sub end_head1 { $_[0]->emit_par(-4) } sub end_head2 { $_[0]->emit_par(-3) } sub end_head3 { $_[0]->emit_par(-2) } sub end_head4 { $_[0]->emit_par(-1) } sub end_Para { $_[0]->emit_par( 0) } sub end_item_bullet { $_[0]->emit_par( 0) } sub end_item_number { $_[0]->emit_par( 0) } sub end_item_text { $_[0]->emit_par(-2) } sub start_L { $_[0]{'Link'} = $_[1] if $_[1]->{type} eq 'url' } sub end_L { if (my $link = delete $_[0]{'Link'}) { # Append the URL to the output unless it's already present. $_[0]{'Thispara'} .= " <$link->{to}>" unless $_[0]{'Thispara'} =~ /\b\Q$link->{to}/; } } sub emit_par { my($self, $tweak_indent) = splice(@_,0,2); my $indent = ' ' x ( 2 * $self->{'Indent'} + 4 + ($tweak_indent\|\|0) ); # Yes, 'STRING' x NEGATIVE gives '', same as 'STRING' x 0 $self->{'Thispara'} =~ s/$Pod::Simple::shy//g; my $out = Text::Wrap::wrap($indent, $indent, $self->{'Thispara'} .= "\n"); $out =~ s/$Pod::Simple::nbsp/ /g; print {$self->{'output_fh'}} $out, "\n"; $self->{'Thispara'} = ''; return; } # . . . . . . . . . . And then off by its lonesome: sub end_Verbatim { my $self = shift; $self->{'Thispara'} =~ s/$Pod::Simple::nbsp/ /g; $self->{'Thispara'} =~ s/$Pod::Simple::shy//g; my $i = ' ' x ( 2 * $self->{'Indent'} + 4); #my $i = ' ' x (4 + $self->{'Indent'}); $self->{'Thispara'} =~ s/^/$i/mg; print { $self->{'output_fh'} } '', $self->{'Thispara'}, "\n\n" ; $self->{'Thispara'} = ''; return; } #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1; __END__ =head1 NAME Pod::Simple::Text -- format Pod as plaintext =head1 SYNOPSIS perl -MPod::Simple::Text -e \ "exit Pod::Simple::Text->filter(shift)->any_errata_seen" \ thingy.pod =head1 DESCRIPTION This class is a formatter that takes Pod and renders it as wrapped plaintext. Its wrapping is done by L<Text::Wrap>, so you can change C<$Text::Wrap::columns> as you like. This is a subclass of L<Pod::Simple> and inherits all its methods. =head1 SEE ALSO L<Pod::Simple>, L<Pod::Simple::TextContent>, L<Pod::Text> =head1 SUPPORT Questions or discussion about POD and Pod::Simple should be sent to the pod-people@perl.org mail list. Send an empty email to pod-people-subscribe@perl.org to subscribe. This module is managed in an open GitHub repository, L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! Patches against Pod::Simple are welcome. Please send bug reports to <bug-pod-simple@rt.cpan.org>. =head1 COPYRIGHT AND DISCLAIMERS Copyright (c) 2002 Sean M. Burke. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. =head1 AUTHOR Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. But don't bother him, he's retired. Pod::Simple is maintained by: =over =item * Allison Randal C<allison@perl.org> =item * Hans Dieter Pearcey C<hdp@cpan.org> =item * David E. Wheeler C<dwheeler@cpan.org> =back =cut PK��]"\��i�g��g��XHTML.pmnu��[��=pod =head1 NAME Pod::Simple::XHTML -- format Pod as validating XHTML =head1 SYNOPSIS use Pod::Simple::XHTML; my $parser = Pod::Simple::XHTML->new(); ... $parser->parse_file('path/to/file.pod'); =head1 DESCRIPTION This class is a formatter that takes Pod and renders it as XHTML validating HTML. This is a subclass of L<Pod::Simple::Methody> and inherits all its methods. The implementation is entirely different than L<Pod::Simple::HTML>, but it largely preserves the same interface. =head2 Minimal code use Pod::Simple::XHTML; my $psx = Pod::Simple::XHTML->new; $psx->output_string(\my $html); $psx->parse_file('path/to/Module/Name.pm'); open my $

| ver. 1.6 | Github | . | PHP 8.2.30 | ??????????? ?????????: 0.01 | proxy | phpinfo | ???????????