curl-curl/scripts/spacecheck.pl
Viktor Szakats 62d77b12fc
spacecheck: check long lines and repeat spaces, fix fallouts
Verify if lines are not longer than 192 characters. Also verify if lines
have less than 79 repeat spaces (and fix one fallout).

To improve readability by avoiding long lines and to prevent adding
overly long lines with text that may go unnoticed in an editor or diff
viewer.

In addition to pre-existing line length limits: 79 for C, 132 for CMake
sources.

Also:
- spacecheck: fix/harden allowlist regexes.
- spacecheck: tidy-up quotes and simplify escaping.
- spacecheck: allow folding strings with repeat spaces.
- GHA: fix a suppressed shellcheck warning.
- GHA/macos: simplify by dropping brew bundle.
- test1119.pl: precompile a regex.
- FAQ.md: delete very long link to a Windows 7/2008 support article
  that's lost it relevance.

Closes #21087
2026-03-25 11:02:08 +01:00

269 lines
6.7 KiB
Perl
Executable File

#!/usr/bin/env perl
#***************************************************************************
# _ _ ____ _
# Project ___| | | | _ \| |
# / __| | | | |_) | |
# | (__| |_| | _ <| |___
# \___|\___/|_| \_\_____|
#
# Copyright (C) Viktor Szakats
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://curl.se/docs/copyright.html.
#
# You may opt to use, copy, modify, merge, publish, distribute and/or sell
# copies of the Software, and permit persons to whom the Software is
# furnished to do so, under the terms of the COPYING file.
#
# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
# KIND, either express or implied.
#
# SPDX-License-Identifier: curl
#
###########################################################################
use strict;
use warnings;
my @tabs = (
'^m4/zz40-xc-ovr\.m4$',
'Makefile\.(am|example)$',
'\.sln$',
'^tests/data/data1706-stdout\.txt',
'^tests/data/test',
);
my @need_crlf = (
'\.(bat|sln)$',
);
my @double_empty_lines = (
'^RELEASE-NOTES$',
'^lib/.+\.(c|h)$',
'^projects/OS400/',
'^projects/vms/',
'^tests/data/test',
'\.(m4|py)$',
);
my @longline = (
'\.github/workflows/windows\.yml$',
'^renovate\.json$',
'^docs/DISTROS\.md$',
'^projects/Windows/tmpl/.+\.vcxproj$',
'^tests/certs/srp-verifier-',
'^tests/data/test',
);
my @non_ascii_allowed = (
'\xC3\xB6', # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS
);
my $non_ascii_allowed = join(', ', @non_ascii_allowed);
my @non_ascii = (
'^\.github/scripts/pyspelling\.words$',
'^\.mailmap$',
'^RELEASE-NOTES$',
'^docs/BINDINGS\.md$',
'^docs/THANKS$',
'^docs/THANKS-filter$',
);
sub fn_match {
my ($filename, @masklist) = @_;
foreach my $mask (@masklist) {
if($filename =~ $mask) {
return 1;
}
}
return 0;
}
sub eol_detect {
my ($content) = @_;
my $cr = () = $content =~ /\r/g;
my $lf = () = $content =~ /\n/g;
if($cr > 0 && $lf == 0) {
return 'cr';
}
elsif($cr == 0 && $lf > 0) {
return 'lf';
}
elsif($cr == 0 && $lf == 0) {
return 'bin';
}
elsif($cr == $lf) {
return 'crlf';
}
return '';
}
my $issues = 0;
open(my $git_ls_files, '-|', 'git', 'ls-files') or die "Failed running git ls-files: $!";
while(my $filename = <$git_ls_files>) {
chomp $filename;
open(my $fh, '<', $filename) or die "Cannot open '$filename': $!";
my $content = do { local $/; <$fh> };
close $fh;
my @err = ();
if(!fn_match($filename, @tabs) &&
$content =~ /\t/) {
push @err, 'content: has tab';
}
my $eol = eol_detect($content);
if($eol eq '') {
push @err, 'content: has mixed EOL types';
}
if($eol ne 'crlf' &&
fn_match($filename, @need_crlf)) {
push @err, 'content: must use CRLF EOL for this file type';
}
if($eol ne 'lf' && $content ne '' &&
!fn_match($filename, @need_crlf)) {
push @err, 'content: must use LF EOL for this file type';
}
if($content =~ /[ \t]\n/) {
my $line;
for my $l (split(/\n/, $content)) {
$line++;
if($l =~ /[ \t]$/) {
push @err, "line $line: trailing whitespace";
}
}
}
if($content ne '' &&
$content !~ /\n\z/) {
push @err, 'content: has no EOL at EOF';
}
if($content =~ /\n\n\z/ ||
$content =~ /\r\n\r\n\z/) {
push @err, 'content: has multiple EOL at EOF';
}
if((!fn_match($filename, @double_empty_lines) &&
($content =~ /\n\n\n/ ||
$content =~ /\r\n\r\n\r\n/)) ||
$content =~ />\n\n\n+[<#]/) {
my $line = 0;
my $blank = 0;
for my $l (split(/\n/, $content)) {
chomp $l;
$line++;
if($l =~ /^$/) {
if($blank) {
my $lineno = sprintf('duplicate empty line @ line %d', $line);
push @err, $lineno;
}
$blank = 1;
}
else {
$blank = 0;
}
}
}
if(!fn_match($filename, @longline)) {
my $line = 0;
my $max = 192;
for my $l (split(/\n/, $content)) {
$line++;
if(length($l) > $max) {
push @err, sprintf('line %d: long (%d > %d) line', $line, length($l), $max);
}
}
}
my $line = 0;
my $max = 79;
for my $l (split(/\n/, $content)) {
$line++;
if($l =~ /( {$max,})/) {
push @err, sprintf('line %d: repeat spaces (%d > %d)', $line, length($1), $max);
}
}
my $search = $content;
my $linepos = 0;
while($search =~ /[^ ] "\n *" [^ ]/) {
my $part = substr($search, 0, $+[0]);
$search = substr($search, $+[0]);
my $line = ($part =~ tr/\n//);
push @err, sprintf('line %d: double spaces in folded string', $linepos + $line);
$linepos += $line;
}
$search = $content;
$linepos = 0;
while($search =~ /\n\n *}\n/) {
my $part = substr($search, 0, $+[0] - 1);
$search = substr($search, $+[0]);
my $line = ($part =~ tr/\n//);
push @err, sprintf("line %d: '}' preceded by empty line", $linepos + $line);
$linepos += $line + 1;
}
$search = $content;
$linepos = 0;
while($search =~ /\n\{\n\n/) {
my $part = substr($search, 0, $+[0]);
$search = substr($search, $+[0]);
my $line = ($part =~ tr/\n//);
push @err, sprintf("line %d: top-level '{' followed by empty line", $linepos + $line);
$linepos += $line;
}
if($content =~ /([\x00-\x08\x0b\x0c\x0e-\x1f\x7f])/) {
push @err, 'content: has binary contents';
}
if($filename !~ /tests\/data/) {
# the tests have no allowed UTF bytes
$content =~ s/[$non_ascii_allowed]//g;
}
if(!fn_match($filename, @non_ascii) &&
($content =~ /([\x80-\xff]+)/)) {
my $non = $1;
my $hex;
for my $e (split(//, $non)) {
$hex .= sprintf('%s%02x', $hex ? ' ': '', ord($e));
}
my $line;
for my $l (split(/\n/, $content)) {
$line++;
if($l =~ /([\x80-\xff]+)/) {
push @err, "line $line: has non-ASCII: '$non' ($hex)";
}
}
}
if(@err) {
$issues++;
foreach my $err (@err) {
print "$filename: $err\n";
}
}
}
close $git_ls_files;
if($issues) {
exit 1;
}