wikiheaders.pl: A bunch of work on correct wordwrap and (de)wikify.

This commit is contained in:
Ryan C. Gordon 2021-07-14 08:11:18 -04:00
parent 0ac13dc27f
commit d115857c6f
No known key found for this signature in database
GPG Key ID: FA148B892AB48044

View File

@ -103,27 +103,36 @@ sub wordwrap {
my $retval = ''; my $retval = '';
while ($str =~ s/(.*?)(\n+\`\`\`.*?\`\`\`\n+|\n+\<syntaxhighlight.*?\<\/syntaxhighlight\>\n+)//ms) { #print("\n\nWORDWRAP:\n\n$str\n\n\n");
while ($str =~ s/(.*?)(\`\`\`.*?\`\`\`|\<syntaxhighlight.*?\<\/syntaxhighlight\>)//ms) {
#print("\n\nWORDWRAP BLOCK:\n\n$1\n\n ===\n\n$2\n\n\n");
$retval .= wordwrap_paragraphs($1); # wrap it. $retval .= wordwrap_paragraphs($1); # wrap it.
$retval .= $2; # don't wrap it. $retval .= "$2\n\n"; # don't wrap it.
} }
$retval .= wordwrap_paragraphs($str); # wrap what's left. $retval .= wordwrap_paragraphs($str); # wrap what's left.
$retval =~ s/\n+$//; $retval =~ s/\n+$//;
#print("\n\nWORDWRAP DONE:\n\n$retval\n\n\n");
return $retval; return $retval;
} }
# This assumes you're moving from Markdown (in the Doxygen data) to Wiki, which
sub wikify { # is why the 'md' section is so sparse.
sub wikify_chunk {
my $wikitype = shift; my $wikitype = shift;
my $str = shift; my $str = shift;
my $codelang = shift;
my $code = shift;
#print("\n\nWIKIFY CHUNK:\n\n$str\n\n\n");
if ($wikitype eq 'mediawiki') { if ($wikitype eq 'mediawiki') {
# Convert obvious SDL things to wikilinks. # Convert obvious SDL things to wikilinks.
$str =~ s/\b(SDL_[a-zA-Z0-9_]+)/[[$1]]/gms; $str =~ s/\b(SDL_[a-zA-Z0-9_]+)/[[$1]]/gms;
# Make some Markdown things into MediaWiki... # Make some Markdown things into MediaWiki...
$str =~ s/\`\`\`(c\+\+|c)(.*?)\`\`\`/<syntaxhighlight lang='$1'>$2<\/syntaxhighlight>/gms;
# <code></code> is also popular. :/ # <code></code> is also popular. :/
$str =~ s/\`(.*?)\`/<code>$1<\/code>/gms; $str =~ s/\`(.*?)\`/<code>$1<\/code>/gms;
@ -139,71 +148,55 @@ sub wikify {
# bullets # bullets
$str =~ s/^\- /* /gm; $str =~ s/^\- /* /gm;
if (defined $code) {
$str .= "<syntaxhighlight lang='$codelang'>$code<\/syntaxhighlight>";
}
} elsif ($wikitype eq 'md') { } elsif ($wikitype eq 'md') {
# Convert obvious SDL things to wikilinks. # Convert obvious SDL things to wikilinks.
$str =~ s/\b(SDL_[a-zA-Z0-9_]+)/[$1]($1)/gms; $str =~ s/\b(SDL_[a-zA-Z0-9_]+)/[$1]($1)/gms;
if (defined $code) {
$str .= "```$codelang$code```";
} }
}
#print("\n\nWIKIFY CHUNK DONE:\n\n$str\n\n\n");
return $str; return $str;
} }
sub dewikify { sub wikify {
my $wikitype = shift; my $wikitype = shift;
my $str = shift; my $str = shift;
return '' if not defined $str; my $retval = '';
my @lines = split /\n/, $str;
return '' if scalar(@lines) == 0; #print("WIKIFY WHOLE:\n\n$str\n\n\n");
while ($str =~ s/\A(.*?)\`\`\`(c\+\+|c)(.*?)\`\`\`//ms) {
$retval .= wikify_chunk($wikitype, $1, $2, $3);
}
$retval .= wikify_chunk($wikitype, $str, undef, undef);
#print("WIKIFY WHOLE DONE:\n\n$retval\n\n\n");
return $retval;
}
sub dewikify_chunk {
my $wikitype = shift;
my $str = shift;
my $codelang = shift;
my $code = shift;
#print("\n\nDEWIKIFY CHUNK:\n\n$str\n\n\n");
my $iwikitype = 0;
if ($wikitype eq 'mediawiki') { if ($wikitype eq 'mediawiki') {
$iwikitype = 1;
} elsif ($wikitype eq 'md') {
$iwikitype = 2;
} else {
die("Unexpected wikitype '$wikitype'\n");
}
while (1) {
my $l = shift @lines;
last if not defined $l;
chomp($l);
$l =~ s/\A\s*//;
$l =~ s/\s*\Z//;
next if ($l eq '');
next if ($iwikitype == 1) and ($l =~ /\A\= .*? \=\Z/);
next if ($iwikitype == 1) and ($l =~ /\A\=\= .*? \=\=\Z/);
next if ($iwikitype == 2) and ($l =~ /\A\#\# /);
unshift @lines, $l;
last;
}
while (1) {
my $l = pop @lines;
last if not defined $l;
chomp($l);
$l =~ s/\A\s*//;
$l =~ s/\s*\Z//;
next if ($l eq '');
push @lines, $l;
last;
}
$str = '';
foreach (@lines) {
chomp;
s/\A\s*//;
s/\s*\Z//;
$str .= "$_\n";
}
if ($iwikitype == 1) { #($wikitype eq 'mediawiki')
# Doxygen supports Markdown (and it just simply looks better than MediaWiki # Doxygen supports Markdown (and it just simply looks better than MediaWiki
# when looking at the raw headers, so do some conversions here as necessary. # when looking at the raw headers), so do some conversions here as necessary.
$str =~ s/\[\[(SDL_[a-zA-Z0-9_]+)\]\]/$1/gms; # Dump obvious wikilinks. $str =~ s/\[\[(SDL_[a-zA-Z0-9_]+)\]\]/$1/gms; # Dump obvious wikilinks.
# convert mediawiki syntax highlighting to Markdown backticks.
$str =~ s/\<syntaxhighlight lang='?(.*?)'?>(.*?)<\/syntaxhighlight>/```$1$2```/gms;
# <code></code> is also popular. :/ # <code></code> is also popular. :/
$str =~ s/\<code>(.*?)<\/code>/`$1`/gms; $str =~ s/\<code>(.*?)<\/code>/`$1`/gms;
@ -220,9 +213,36 @@ sub dewikify {
$str =~ s/^\* /- /gm; $str =~ s/^\* /- /gm;
} }
if (defined $code) {
$str .= "```$codelang$code```";
}
#print("\n\nDEWIKIFY CHUNK DONE:\n\n$str\n\n\n");
return $str; return $str;
} }
sub dewikify {
my $wikitype = shift;
my $str = shift;
return '' if not defined $str;
#print("DEWIKIFY WHOLE:\n\n$str\n\n\n");
$str =~ s/\A[\s\n]*\= .*? \=\s*?\n+//ms;
$str =~ s/\A[\s\n]*\=\= .*? \=\=\s*?\n+//ms;
my $retval = '';
while ($str =~ s/\A(.*?)<syntaxhighlight lang='?(.*?)'?>(.*?)<\/syntaxhighlight\>//ms) {
$retval .= dewikify_chunk($wikitype, $1, $2, $3);
}
$retval .= dewikify_chunk($wikitype, $str, undef, undef);
#print("DEWIKIFY WHOLE DONE:\n\n$retval\n\n\n");
return $retval;
}
sub usage { sub usage {
die("USAGE: $0 <source code git clone path> <wiki git clone path> [--copy-to-headers|--copy-to-wiki] [--warn-about-missing]\n\n"); die("USAGE: $0 <source code git clone path> <wiki git clone path> [--copy-to-headers|--copy-to-wiki] [--warn-about-missing]\n\n");
} }
@ -280,9 +300,24 @@ while (readdir(DH)) {
chomp; chomp;
push @templines, $_; push @templines, $_;
last if /\A\s*\*\/\Z/; last if /\A\s*\*\/\Z/;
if (s/\A\s*\*\s*\`\`\`/```/) { # this is a hack, but a lot of other code relies on the whitespace being trimmed, but we can't trim it in code blocks...
$str .= "$_\n";
while (<FH>) {
chomp;
push @templines, $_;
s/\A\s*\*\s?//;
if (s/\A\s*\`\`\`/```/) {
$str .= "$_\n";
last;
} else {
$str .= "$_\n";
}
}
} else {
s/\A\s*\*\s*//; s/\A\s*\*\s*//;
$str .= "$_\n"; $str .= "$_\n";
} }
}
my $decl = <FH>; my $decl = <FH>;
chomp($decl); chomp($decl);
@ -341,6 +376,7 @@ while (readdir(DH)) {
} }
#print("$fn:\n$str\n\n"); #print("$fn:\n$str\n\n");
$headerfuncs{$fn} = $str; $headerfuncs{$fn} = $str;
$headerdecls{$fn} = $decl; $headerdecls{$fn} = $decl;
$headerfuncslocation{$fn} = $dent; $headerfuncslocation{$fn} = $dent;
@ -424,8 +460,7 @@ while (readdir(DH)) {
die("Unexpected wiki file type. Fixme!\n"); die("Unexpected wiki file type. Fixme!\n");
} }
my $str = ($current_section eq 'Code Examples') ? $orig : $_; $sections{$current_section} .= "$orig\n";
$sections{$current_section} .= "$str\n";
} }
close(FH); close(FH);
@ -646,13 +681,24 @@ if ($copy_direction == 1) { # --copy-to-headers
@doxygenlines = (@briefsplit, @doxygenlines); @doxygenlines = (@briefsplit, @doxygenlines);
my $remarks = ''; my $remarks = '';
# !!! FIXME: wordwrap and wikify might handle this, now.
while (@doxygenlines) { while (@doxygenlines) {
last if $doxygenlines[0] =~ /\A\\/; # some sort of doxygen command, assume we're past the general remarks. last if $doxygenlines[0] =~ /\A\\/; # some sort of doxygen command, assume we're past the general remarks.
my $l = shift @doxygenlines; my $l = shift @doxygenlines;
if ($l =~ /\A\`\`\`/) { # syntax highlighting, don't reformat.
$remarks .= "$l\n";
while ((@doxygenlines) && (not $l =~ /\`\`\`\Z/)) {
$l = shift @doxygenlines;
$remarks .= "$l\n";
}
} else {
$l =~ s/\A\s*//; $l =~ s/\A\s*//;
$l =~ s/\s*\Z//; $l =~ s/\s*\Z//;
$remarks .= "$l\n"; $remarks .= "$l\n";
} }
}
#print("REMARKS:\n\n $remarks\n\n");
$remarks = wordwrap(wikify($wikitype, $remarks)); $remarks = wordwrap(wikify($wikitype, $remarks));
$remarks =~ s/\A\s*//; $remarks =~ s/\A\s*//;