mirror of
https://github.com/curl/curl.git
synced 2026-04-11 12:01:42 +08:00
Instead of stdin. To simplify the command-line, and allow using a safe and portable `system()` call from `badwords-all`. Ref: https://perldoc.perl.org/functions/system Closes #20970
338 lines
7.3 KiB
Perl
Executable File
338 lines
7.3 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
|
|
#
|
|
# SPDX-License-Identifier: curl
|
|
#
|
|
# bad[:=]correct
|
|
#
|
|
# If separator is '=', the string will be compared case sensitively.
|
|
# If separator is ':', the check is done case insensitively.
|
|
#
|
|
# To add white listed uses of bad words that are removed before checking for
|
|
# the bad ones:
|
|
#
|
|
# ---(accepted word)
|
|
# ---:[path]:(accepted word)
|
|
#
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use File::Basename;
|
|
|
|
#
|
|
## States
|
|
#
|
|
# 0 - default, initial state
|
|
# 1 - there was a slash
|
|
# 2 - quoted string
|
|
# 3 - // comment
|
|
# 4 - /* comment
|
|
# 5 - asterisk found within a /* comment
|
|
# 6 - #include line
|
|
# 7 - backslash in a string
|
|
#
|
|
## Flags
|
|
#
|
|
# 1 - include preprocessor line, ignore strings
|
|
|
|
sub srcline {
|
|
my ($state, $flags, $l) = @_;
|
|
my $line = "";
|
|
|
|
if(($state == 0) && ($l =~ /^ *\# *include/)) {
|
|
# preprocessor include line
|
|
$flags |= 1;
|
|
}
|
|
else {
|
|
# not preprocessor
|
|
$flags &= ~1;
|
|
}
|
|
|
|
if($state == 3) {
|
|
# // ended on the prev line, go back to init
|
|
$state = 0;
|
|
}
|
|
|
|
my @c = split(//, $l);
|
|
|
|
# state machine this line
|
|
for my $c (@c) {
|
|
if($state == 1) {
|
|
# we had a slash
|
|
if($c eq "/") {
|
|
# // confirmed, the rest of the line is a comment
|
|
$line .= "//";
|
|
$state = 3;
|
|
}
|
|
elsif($c eq "*") {
|
|
# /* confirmed
|
|
$state = 4;
|
|
$line .= "/*";
|
|
}
|
|
else {
|
|
# back to normal
|
|
$line .= " ";
|
|
$state = 0;
|
|
}
|
|
}
|
|
elsif($state == 2) {
|
|
# a string
|
|
if($c eq "\\") {
|
|
$line .= "\\";
|
|
$state = 7;
|
|
}
|
|
elsif($c eq "\"") {
|
|
# end of the string
|
|
$line .= "\"";
|
|
$state = 0;
|
|
}
|
|
else {
|
|
$line .= $c;
|
|
}
|
|
}
|
|
elsif($state == 3) {
|
|
# a // comment
|
|
$line .= $c;
|
|
}
|
|
elsif($state == 4) {
|
|
# a /* comment
|
|
if($c eq "*") {
|
|
# could be a comment close
|
|
$state = 5;
|
|
}
|
|
else {
|
|
$line .= $c;
|
|
}
|
|
}
|
|
elsif($state == 5) {
|
|
if($c eq "/") {
|
|
# a /* */ comment ended here */
|
|
$line .= "*/";
|
|
$state = 0;
|
|
}
|
|
else {
|
|
# the /* comment continues
|
|
$line .= "*$c";
|
|
$state = 4;
|
|
}
|
|
}
|
|
elsif($state == 7) {
|
|
# the prev was a backslash in a string
|
|
$line .= $c;
|
|
# switch back to normal string
|
|
$state = 2;
|
|
}
|
|
else {
|
|
if($c eq "/") {
|
|
$state = 1; # got a slash
|
|
}
|
|
elsif(($c eq "\"") && !($flags & 1)) {
|
|
# start of a string, not within a preprocessor line
|
|
$line .= "\"";
|
|
$state = 2;
|
|
}
|
|
elsif($c eq "\n") {
|
|
$line .= "\n";
|
|
}
|
|
else {
|
|
$line .= " ";
|
|
}
|
|
}
|
|
}
|
|
return $state, $flags, $line;
|
|
}
|
|
|
|
sub sourcecode {
|
|
my ($f) = @_;
|
|
my $state = 0;
|
|
my $flags = 0;
|
|
my @lines;
|
|
my $line;
|
|
open(F, "<$f");
|
|
while(<F>) {
|
|
my $l = $_;
|
|
($state, $flags, $line) = srcline($state, $flags, $l);
|
|
push @lines, $line;
|
|
}
|
|
close(F);
|
|
return @lines;
|
|
}
|
|
|
|
my @whitelist = (
|
|
# ignore what looks like URLs
|
|
'(^|\W)((https|http|ftp):\/\/[a-z0-9\-._~%:\/?\#\[\]\@!\$&\'\(\)*+,;=]+)',
|
|
# remove bolded sections
|
|
'\*\*.*?\*\*',
|
|
# remove backticked texts
|
|
'\`.*?\`'
|
|
);
|
|
my %alt;
|
|
my %exactcase;
|
|
|
|
my %wl;
|
|
|
|
my @w;
|
|
my @exact;
|
|
my $file = shift @ARGV;
|
|
open(CONFIG, "<$file") or die "Cannot open '$file': $!";
|
|
while(<CONFIG>) {
|
|
chomp;
|
|
if($_ =~ /^#/) {
|
|
next;
|
|
}
|
|
if(/^---:([^:]*):(.*)/) {
|
|
# whitelist file + word
|
|
my $word = lc($2);
|
|
$wl{"$1:$word"}=1;
|
|
}
|
|
elsif($_ =~ /^---(.+)/) {
|
|
# whitelist word
|
|
push @whitelist, $1;
|
|
}
|
|
elsif($_ =~ /^(.*)([:=])(.*)/) {
|
|
my ($bad, $sep, $better)=($1, $2, $3);
|
|
if($sep eq "=") {
|
|
$alt{$bad} = $better;
|
|
push @exact, $bad;
|
|
}
|
|
else {
|
|
$alt{lc($bad)} = $better;
|
|
push @w, $bad;
|
|
}
|
|
}
|
|
}
|
|
close(CONFIG);
|
|
|
|
# Build a single combined regex for case-insensitive words
|
|
my $re_ci;
|
|
if(@w) {
|
|
my $pat = join('|', map { quotemeta($_) } @w);
|
|
$re_ci = qr/\b($pat)\b/i;
|
|
}
|
|
|
|
# Build a single combined regex for case-sensitive (exact) words
|
|
my $re_cs;
|
|
if(@exact) {
|
|
my $pat = join('|', map { quotemeta($_) } @exact);
|
|
$re_cs = qr/\b($pat)\b/;
|
|
}
|
|
|
|
# Build a single combined regex for removing whitelisted content
|
|
my $re_wl;
|
|
my $pat = join('|', map { $_ } @whitelist);
|
|
$re_wl = qr/($pat)/;
|
|
|
|
my $errors = 0;
|
|
|
|
sub highlight {
|
|
my ($p, $w, $in, $f, $l, $lookup) = @_;
|
|
|
|
my $c = length($p)+1;
|
|
my $ch;
|
|
|
|
my $dir = dirname($f);
|
|
$ch = $dir . "/" . ":" . lc($w);
|
|
if($wl{$ch}) {
|
|
# whitelisted dirname + word
|
|
return;
|
|
}
|
|
my $updir = dirname($dir);
|
|
if($dir ne $updir) {
|
|
$ch = $updir . "/" . ":" . lc($w);
|
|
if($wl{$ch}) {
|
|
# whitelisted upper dirname + word
|
|
return;
|
|
}
|
|
}
|
|
$ch = $f . ":" . lc($w);
|
|
if($wl{$ch}) {
|
|
# whitelisted filename + word
|
|
return;
|
|
}
|
|
|
|
print STDERR "$f:$l:$c: error: found bad word \"$w\"\n";
|
|
printf STDERR " %4d | %s\n", $l, $in;
|
|
printf STDERR " | %*s^%s\n", length($p), " ",
|
|
"~" x (length($w)-1);
|
|
printf STDERR " maybe use \"%s\" instead?\n", $alt{$lookup};
|
|
$errors++;
|
|
}
|
|
|
|
sub document {
|
|
my ($f) = @_;
|
|
my @lines;
|
|
open(F, "<$f");
|
|
while(<F>) {
|
|
push @lines, $_;
|
|
}
|
|
close(F);
|
|
return @lines;
|
|
}
|
|
|
|
sub file {
|
|
my ($f) = @_;
|
|
my $l = 0;
|
|
|
|
my $skip_indented = 0;
|
|
my $source_code = 0;
|
|
if($f =~ /\.[ch]$/) {
|
|
$source_code = 1;
|
|
}
|
|
else {
|
|
# markdown
|
|
$skip_indented = 1;
|
|
}
|
|
|
|
my @lines;
|
|
if($source_code) {
|
|
@lines = sourcecode($f);
|
|
}
|
|
else {
|
|
@lines = document($f);
|
|
}
|
|
for my $in (@lines) {
|
|
$l++;
|
|
chomp $in;
|
|
if($skip_indented && $in =~ /^ /) {
|
|
next;
|
|
}
|
|
# remove the link part
|
|
$in =~ s/(\[.*\])\(.*\)/$1/g;
|
|
# remove whitelisted patterns (pre-compiled)
|
|
if($re_wl) {
|
|
$in =~ s/${re_wl}//ig;
|
|
}
|
|
# case-insensitive bad words
|
|
if($re_ci) {
|
|
if($in =~ /^(.*)$re_ci/i) {
|
|
highlight($1, $2, $in, $f, $l, lc($2));
|
|
}
|
|
}
|
|
# case-sensitive (exact) bad words
|
|
if($re_cs) {
|
|
if($in =~ /^(.*)$re_cs/) {
|
|
highlight($1, $2, $in, $f, $l, $2);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
my @filemasks = @ARGV;
|
|
open(my $git_ls_files, '-|', 'git', 'ls-files', '--', @filemasks) or die "Failed running git ls-files: $!";
|
|
my @files;
|
|
while(my $each = <$git_ls_files>) {
|
|
chomp $each;
|
|
push @files, $each;
|
|
}
|
|
close $git_ls_files;
|
|
|
|
my $onum = scalar(@files);
|
|
my $num;
|
|
for my $e (@files) {
|
|
#printf STDERR "Complete: %d%%\r", $num++ * 100 / $onum;
|
|
file($e);
|
|
}
|
|
|
|
exit $errors;
|