#!/usr/bin/env perl #*************************************************************************** # _ _ ____ _ # Project ___| | | | _ \| | # / __| | | | |_) | | # | (__| |_| | _ <| |___ # \___|\___/|_| \_\_____| # # Copyright (C) Viktor Szakats # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at https://curl.se/docs/copyright.html. # # You may opt to use, copy, modify, merge, publish, distribute and/or sell # copies of the Software, and permit persons to whom the Software is # furnished to do so, under the terms of the COPYING file. # # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY # KIND, either express or implied. # # SPDX-License-Identifier: curl # ########################################################################### use strict; use warnings; my @tabs = ( '^m4/zz40-xc-ovr\.m4$', 'Makefile\.(am|example)$', '\.sln$', '^tests/data/data1706-stdout\.txt', '^tests/data/test', ); my @need_crlf = ( '\.(bat|sln)$', ); my @double_empty_lines = ( '^RELEASE-NOTES$', '^lib/.+\.(c|h)$', '^projects/OS400/', '^projects/vms/', '^tests/data/test', '\.(m4|py)$', ); my @longline = ( '\.github/workflows/windows\.yml$', '^renovate\.json$', '^docs/DISTROS\.md$', '^projects/Windows/tmpl/.+\.vcxproj$', '^tests/certs/srp-verifier-', '^tests/data/test', ); my @non_ascii_allowed = ( '\xC3\xB6', # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS ); my $non_ascii_allowed = join(', ', @non_ascii_allowed); my @non_ascii = ( '^\.github/scripts/pyspelling\.words$', '^\.mailmap$', '^RELEASE-NOTES$', '^docs/BINDINGS\.md$', '^docs/THANKS$', '^docs/THANKS-filter$', ); sub fn_match { my ($filename, @masklist) = @_; foreach my $mask (@masklist) { if($filename =~ $mask) { return 1; } } return 0; } sub eol_detect { my ($content) = @_; my $cr = () = $content =~ /\r/g; my $lf = () = $content =~ /\n/g; if($cr > 0 && $lf == 0) { return 'cr'; } elsif($cr == 0 && $lf > 0) { return 'lf'; } elsif($cr == 0 && $lf == 0) { return 'bin'; } elsif($cr == $lf) { return 'crlf'; } return ''; } my $issues = 0; open(my $git_ls_files, '-|', 'git', 'ls-files') or die "Failed running git ls-files: $!"; while(my $filename = <$git_ls_files>) { chomp $filename; open(my $fh, '<', $filename) or die "Cannot open '$filename': $!"; my $content = do { local $/; <$fh> }; close $fh; my @err = (); if(!fn_match($filename, @tabs) && $content =~ /\t/) { push @err, 'content: has tab'; } my $eol = eol_detect($content); if($eol eq '') { push @err, 'content: has mixed EOL types'; } if($eol ne 'crlf' && fn_match($filename, @need_crlf)) { push @err, 'content: must use CRLF EOL for this file type'; } if($eol ne 'lf' && $content ne '' && !fn_match($filename, @need_crlf)) { push @err, 'content: must use LF EOL for this file type'; } if($content =~ /[ \t]\n/) { my $line; for my $l (split(/\n/, $content)) { $line++; if($l =~ /[ \t]$/) { push @err, "line $line: trailing whitespace"; } } } if($content ne '' && $content !~ /\n\z/) { push @err, 'content: has no EOL at EOF'; } if($content =~ /\n\n\z/ || $content =~ /\r\n\r\n\z/) { push @err, 'content: has multiple EOL at EOF'; } if((!fn_match($filename, @double_empty_lines) && ($content =~ /\n\n\n/ || $content =~ /\r\n\r\n\r\n/)) || $content =~ />\n\n\n+[<#]/) { my $line = 0; my $blank = 0; for my $l (split(/\n/, $content)) { chomp $l; $line++; if($l =~ /^$/) { if($blank) { my $lineno = sprintf('duplicate empty line @ line %d', $line); push @err, $lineno; } $blank = 1; } else { $blank = 0; } } } if(!fn_match($filename, @longline)) { my $line = 0; my $max = 192; for my $l (split(/\n/, $content)) { $line++; if(length($l) > $max) { push @err, sprintf('line %d: long (%d > %d) line', $line, length($l), $max); } } } my $line = 0; my $max = 79; for my $l (split(/\n/, $content)) { $line++; if($l =~ /( {$max,})/) { push @err, sprintf('line %d: repeat spaces (%d > %d)', $line, length($1), $max); } } my $search = $content; my $linepos = 0; while($search =~ /[^ ] "\n *" [^ ]/) { my $part = substr($search, 0, $+[0]); $search = substr($search, $+[0]); my $line = ($part =~ tr/\n//); push @err, sprintf('line %d: double spaces in folded string', $linepos + $line); $linepos += $line; } $search = $content; $linepos = 0; while($search =~ /\n\n *}\n/) { my $part = substr($search, 0, $+[0] - 1); $search = substr($search, $+[0]); my $line = ($part =~ tr/\n//); push @err, sprintf("line %d: '}' preceded by empty line", $linepos + $line); $linepos += $line + 1; } $search = $content; $linepos = 0; while($search =~ /\n\{\n\n/) { my $part = substr($search, 0, $+[0]); $search = substr($search, $+[0]); my $line = ($part =~ tr/\n//); push @err, sprintf("line %d: top-level '{' followed by empty line", $linepos + $line); $linepos += $line; } if($content =~ /([\x00-\x08\x0b\x0c\x0e-\x1f\x7f])/) { push @err, 'content: has binary contents'; } if($filename !~ /tests\/data/) { # the tests have no allowed UTF bytes $content =~ s/[$non_ascii_allowed]//g; } if(!fn_match($filename, @non_ascii) && ($content =~ /([\x80-\xff]+)/)) { my $non = $1; my $hex; for my $e (split(//, $non)) { $hex .= sprintf('%s%02x', $hex ? ' ': '', ord($e)); } my $line; for my $l (split(/\n/, $content)) { $line++; if($l =~ /([\x80-\xff]+)/) { push @err, "line $line: has non-ASCII: '$non' ($hex)"; } } } if(@err) { $issues++; foreach my $err (@err) { print "$filename: $err\n"; } } } close $git_ls_files; if($issues) { exit 1; }