curl-curl/tests/test1173.pl
Viktor Szakats 0260e8465a
GHA/checksrc: expand spellcheck, fix issues found
- codespell: break logic out into its own runnable script. Allowing
  to run it on local machines.
- codespell: install via `pip`, bump to latest version.
- codespell: show version number in CI log.
- codespell: drop no longer needed word exception: `msdos`.
- codespell: include all curl source tree, except `packages` and
  `winbuild`. Drop an obsolete file exclusion.
- add new spellchecker job using the `typos` tool. It includes
  the codespell dictionary and a couple more. Use linuxbrew to install
  it. This takes 10 seconds, while installing via `cargo` from source
  would take over a minute.
- codespell: introduce an inline ignore filter compatible with `cspell`
  Make `typos` recognize it, too. Move single exceptions inline.

Fix new typos found. Also rename variables and words to keep
spellchecking exceptions at minumum. This involves touching some tests.
Also switch base64 strings to `%b64[]` to avoid false positives.

Ref: https://github.com/crate-ci/typos/blob/master/docs/reference.md
Ref: https://github.com/codespell-project/codespell?tab=readme-ov-file#inline-ignore
Ref: https://github.com/codespell-project/codespell/issues/1212#issuecomment-1721152455
Ref: https://cspell.org/docs/Configuration/document-settings

Closes #17905
2025-07-21 16:09:01 +02:00

399 lines
11 KiB
Perl
Executable File

#!/usr/bin/env perl
#***************************************************************************
# _ _ ____ _
# Project ___| | | | _ \| |
# / __| | | | |_) | |
# | (__| |_| | _ <| |___
# \___|\___/|_| \_\_____|
#
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################
#
# Scan manpage(s) and detect some simple and yet common formatting mistakes.
#
# Output all deviances to stderr.
use strict;
use warnings;
use File::Basename;
# get the file name first
my $symbolsinversions=shift @ARGV;
# we may get the dir roots pointed out
my @manpages=@ARGV;
my $errors = 0;
my %docsdirs;
my %optblessed;
my %funcblessed;
my @optorder = (
'NAME',
'SYNOPSIS',
'DESCRIPTION',
#'DEFAULT', # CURLINFO_ has no default
'PROTOCOLS',
'EXAMPLE',
'AVAILABILITY',
'RETURN VALUE',
'SEE ALSO'
);
my @funcorder = (
'NAME',
'SYNOPSIS',
'DESCRIPTION',
'EXAMPLE',
'AVAILABILITY',
'RETURN VALUE',
'SEE ALSO'
);
my %shline; # section => line number
my %symbol;
# some CURLINFO_ symbols are not actual options for curl_easy_getinfo,
# mark them as "deprecated" to hide them from link-warnings
my %deprecated = (
CURLINFO_TEXT => 1,
CURLINFO_HEADER_IN => 1,
CURLINFO_HEADER_OUT => 1,
CURLINFO_DATA_IN => 1,
CURLINFO_DATA_OUT => 1,
CURLINFO_SSL_DATA_IN => 1,
CURLINFO_SSL_DATA_OUT => 1,
CURLOPT_EGDSOCKET => 1,
CURLOPT_RANDOM_FILE => 1,
);
sub allsymbols {
open(my $f, "<", "$symbolsinversions") ||
die "$symbolsinversions: $|";
while(<$f>) {
if($_ =~ /^([^ ]*) +(.*)/) {
my ($name, $info) = ($1, $2);
$symbol{$name}=$name;
if($info =~ /([0-9.]+) +([0-9.]+)/) {
$deprecated{$name}=$info;
}
}
}
close($f);
}
my %ref = (
'curl.1' => 1
);
sub checkref {
my ($f, $sec, $file, $line)=@_;
my $present = 0;
#print STDERR "check $f.$sec\n";
if($ref{"$f.$sec"}) {
# present
return;
}
foreach my $d (keys %docsdirs) {
if(-f "$d/$f.$sec") {
$present = 1;
$ref{"$f.$sec"}=1;
last;
}
}
if(!$present) {
print STDERR "$file:$line broken reference to $f($sec)\n";
$errors++;
}
}
# option-looking words that aren't options
my %allownonref = (
'CURLINFO_TEXT' => 1,
'CURLINFO_HEADER_IN' => 1,
'CURLINFO_HEADER_OUT' => 1,
'CURLINFO_DATA_IN' => 1,
'CURLINFO_DATA_OUT' => 1,
'CURLINFO_SSL_DATA_IN' => 1,
'CURLINFO_SSL_DATA_OUT' => 1,
);
sub scanmanpage {
my ($file) = @_;
my $reqex = 0;
my $inseealso = 0;
my $inexample = 0;
my $insynop = 0;
my $exsize = 0;
my $synopsize = 0;
my $shc = 0;
my $optpage = 0; # option or function
my @sh;
my $SH="";
my @separators;
my @sepline;
open(my $m, "<", "$file") ||
die "test1173.pl could not open $file";
if($file =~ /[\/\\](CURL|curl_)([^\/\\]*).3/) {
# This is a manpage for libcurl. It requires an example unless it's
# considered deprecated.
$reqex = 1 unless defined $deprecated{'CURL'.$2};
if($1 eq "CURL") {
$optpage = 1;
}
}
my $line = 1;
while(<$m>) {
chomp;
if($_ =~ /^.so /) {
# this manpage is just a referral
close($m);
return;
}
if(($_ =~ /^\.SH SYNOPSIS/i) && ($reqex)) {
# this is for libcurl manpage SYNOPSIS checks
$insynop = 1;
$inexample = 0;
}
elsif($_ =~ /^\.SH EXAMPLE/i) {
$insynop = 0;
$inexample = 1;
}
elsif($_ =~ /^\.SH \"SEE ALSO\"/i) {
$inseealso = 1;
}
elsif($_ =~ /^\.SH/i) {
$insynop = 0;
$inexample = 0;
}
elsif($inseealso) {
if($_ =~ /^\.BR (.*)/i) {
my $f = $1;
if($f =~ /^(lib|)curl/i) {
$f =~ s/[\n\r]//g;
if($f =~ s/([a-z_0-9-]*) \(([13])\)([, ]*)//i) {
push @separators, $3;
push @sepline, $line;
checkref($1, $2, $file, $line);
}
if($f !~ /^ *$/) {
print STDERR "$file:$line bad SEE ALSO format\n";
$errors++;
}
}
else {
if($f =~ /.*(, *)\z/) {
push @separators, $1;
push @sepline, $line;
}
else {
push @separators, " ";
push @sepline, $line;
}
}
}
}
elsif($inexample) {
$exsize++;
if($_ =~ /[^\\]\\n/) {
print STDERR "$file:$line '\\n' need to be '\\\\n'!\n";
}
}
elsif($insynop) {
$synopsize++;
if(($synopsize == 1) && ($_ !~ /\.nf/)) {
print STDERR "$file:$line:1:ERROR: be .nf for proper formatting\n";
}
}
if($_ =~ /^\.SH ([^\r\n]*)/i) {
my $n = $1;
# remove enclosing quotes
$n =~ s/\"(.*)\"\z/$1/;
push @sh, $n;
$shline{$n} = $line;
$SH = $n;
}
if($_ =~ /^\'/) {
print STDERR "$file:$line line starts with single quote!\n";
$errors++;
}
if($_ =~ /\\f([BI])(.*)/) {
my ($format, $rest) = ($1, $2);
if($rest !~ /\\fP/) {
print STDERR "$file:$line missing \\f${format} terminator!\n";
$errors++;
}
}
my $c = $_;
while($c =~ s/\\f([BI])((lib|)curl[a-z_0-9-]*)\(([13])\)//i) {
checkref($2, $4, $file, $line);
}
if(($_ =~ /\\f([BI])((libcurl|CURLOPT_|CURLSHOPT_|CURLINFO_|CURLMOPT_|curl_easy_|curl_multi_|curl_url|curl_mime|curl_global|curl_share)[a-zA-Z_0-9-]+)(.)/) &&
($4 ne "(")) {
my $word = $2;
if(!$allownonref{$word}) {
print STDERR "$file:$line curl ref to $word without section\n";
$errors++;
}
}
if($_ =~ /(.*)\\f([^BIP])/) {
my ($pre, $format) = ($1, $2);
if($pre !~ /\\\z/) {
# only if there wasn't another backslash before the \f
print STDERR "$file:$line suspicious \\f format!\n";
$errors++;
}
}
if(($SH =~ /^(DESCRIPTION|RETURN VALUE|AVAILABILITY)/i) &&
($_ =~ /(.*)((curl_multi|curl_easy|curl_url|curl_global|curl_url|curl_share)[a-zA-Z_0-9-]+)/) &&
($1 !~ /\\fI$/)) {
print STDERR "$file:$line unrefed curl call: $2\n";
$errors++;
}
if($optpage && $SH && ($SH !~ /^(SYNOPSIS|EXAMPLE|NAME|SEE ALSO)/i) &&
($_ =~ /(.*)(CURL(OPT_|MOPT_|INFO_|SHOPT_)[A-Z0-9_]*)/)) {
# an option with its own manpage, check that it is tagged
# for linking
my ($pref, $symbol) = ($1, $2);
if($deprecated{$symbol}) {
# let it be
}
elsif($pref !~ /\\fI\z/) {
print STDERR "$file:$line option $symbol missing \\fI tagging\n";
$errors++;
}
}
if($_ =~ /[ \t]+$/) {
print STDERR "$file:$line trailing whitespace\n";
$errors++;
}
$line++;
}
close($m);
if(@separators) {
# all except the last one need comma
for(0 .. $#separators - 1) {
my $l = $_;
my $sep = $separators[$l];
if($sep ne ",") {
printf STDERR "$file:%d: bad not-last SEE ALSO separator: '%s'\n",
$sepline[$l], $sep;
$errors++;
}
}
# the last one should not do comma
my $sep = $separators[$#separators];
if($sep eq ",") {
printf STDERR "$file:%d: superfluous comma separator\n",
$sepline[$#separators];
$errors++;
}
}
if($reqex) {
# only for libcurl options man-pages
my $shcount = scalar(@sh); # before @sh gets shifted
if($exsize < 2) {
print STDERR "$file:$line missing EXAMPLE section\n";
$errors++;
}
if($shcount < 3) {
print STDERR "$file:$line too few manpage sections!\n";
$errors++;
return;
}
my $got = "start";
my $i = 0;
my $shused = 1;
my @shorig = @sh;
my @order = $optpage ? @optorder : @funcorder;
my $blessed = $optpage ? \%optblessed : \%funcblessed;
while($got) {
my $finesh;
$got = shift(@sh);
if($got) {
if($$blessed{$got}) {
$i = $$blessed{$got};
$finesh = $got; # a mandatory one
}
}
if($i && defined($finesh)) {
# mandatory section
if($i != $shused) {
printf STDERR "$file:%u Got %s, when %s was expected\n",
$shline{$finesh},
$finesh,
$order[$shused-1];
$errors++;
return;
}
$shused++;
if($i == scalar(@order)) {
# last mandatory one, exit
last;
}
}
}
if($i != scalar(@order)) {
printf STDERR "$file:$line missing mandatory section: %s\n",
$order[$i];
printf STDERR "$file:$line section found at index %u: '%s'\n",
$i, $shorig[$i];
printf STDERR " Found %u used sections\n", $shcount;
$errors++;
}
}
}
allsymbols();
if(!$symbol{'CURLALTSVC_H1'}) {
print STDERR "didn't get the symbols-in-version!\n";
exit;
}
my $ind = 1;
for my $s (@optorder) {
$optblessed{$s} = $ind++
}
$ind = 1;
for my $s (@funcorder) {
$funcblessed{$s} = $ind++
}
for my $m (@manpages) {
$docsdirs{dirname($m)}++;
}
for my $m (@manpages) {
scanmanpage($m);
}
print STDERR "ok\n" if(!$errors);
exit $errors;