tests/data: support using native newlines on disk, drop .gitattributes

Data files no longer depend on mixed newline styles. Before this patch the harness still assumed data files to use LF newlines, ensured by `.gitattribute` and distributing sources with LF newlines. To allow using platform native newlines (CRLF on Windows typically), update the test harness to support data files with any newline style on disk. And delete `.gitattributes`. Fix to: - load original data files (from test/data) so that their newline-style doesn't matter on the checked out source repo, meaning it works when its CRLF on Windows, just like any other file. (if a BOM slips in, it's caught by `spacecheck.pl` as binary content.) - do the same in `util.py` used by `smbserver.py` (for test 1451). - also fix `util.py` to use us-ascii encoding for data files, replacing utf-8. Also: - runtests: rework the stray CR checker to allow full CRLF data files, and keep warning for mixed newlines. Follow-up to 904e7ecb66 #19347 Closes #19398
2026-04-12 00:11:42 +08:00 · 2025-11-07 16:39:29 +01:00 · 2025-11-07 16:39:29 +01:00 · f477f3efc3
commit f477f3efc3
parent 8e321a53df
6 changed files with 54 additions and 29 deletions
--- a/tests/data/.gitattributes
+++ b/tests/data/.gitattributes
@ -1,5 +0,0 @@
-# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
-#
-# SPDX-License-Identifier: curl
-
-test* -crlf
--- a/tests/devtest.pl
+++ b/tests/devtest.pl
@ -176,7 +176,7 @@ while(@ARGV) {
    }
    elsif($ARGV[0] eq "preprocess") {
        shift @ARGV;
-        loadtest("${TESTDIR}/test${ARGV[0]}");
+        loadtest("${TESTDIR}/test${ARGV[0]}", 1);
        readtestkeywords();
        singletest_preprocess($ARGV[0]);
    }
--- a/tests/getpart.pm
+++ b/tests/getpart.pm
@ -217,7 +217,7 @@ sub partexists {
 # memoize('partexists', NORMALIZER => 'normalize_part');  # cache each result

 sub loadtest {
-    my ($file)=@_;
+    my ($file, $original)=@_;

    if(defined $xmlfile && $file eq $xmlfile) {
        # This test is already loaded
@ -228,7 +228,12 @@ sub loadtest {
    $xmlfile = "";

    if(open(my $xmlh, "<", "$file")) {
-        binmode $xmlh; # for crapage systems, use binary
+        if($original) {
+            binmode $xmlh, ':crlf'
+        }
+        else {
+            binmode $xmlh; # for crapage systems, use binary
+        }
        while(<$xmlh>) {
            push @xml, $_;
        }
@ -251,16 +256,44 @@ sub fulltest {
    return @xml;
 }

-sub checktest {
-    my $anyerr = 0;
+sub eol_detect {
+    my ($content) = @_;

-    for my $i (0 .. $#xml) {
-        if(index($xml[$i], "\r") >= 0) {
-            print STDERR "*** getpart.pm: $xmlfile:$i: 0x0d carriage return found. Use %CR macro instead.\n";
-            $anyerr = 1;
+    my $cr = () = $content =~ /\r/g;
+    my $lf = () = $content =~ /\n/g;
+
+    if($cr > 0 && $lf == 0) {
+        return "cr";
+    }
+    elsif($cr == 0 && $lf > 0) {
+        return "lf";
+    }
+    elsif($cr == 0 && $lf == 0) {
+        return "bin";
+    }
+    elsif($cr == $lf) {
+        return "crlf";
+    }
+
+    return "";
+}
+
+sub checktest {
+    my ($file) = @_;
+
+    if(open(my $xmlh, '<', $file)) {
+        binmode $xmlh; # we want the raw data to check original newlines
+        my $content = do { local $/; <$xmlh> };
+        close($xmlh);
+
+        my $eol = eol_detect($content);
+        if($eol eq '') {
+            print STDERR "*** getpart.pm: $xmlfile has mixed newlines. Replace significant carriage return with %CR macro, or convert to consistent newlines.\n";
+            return 1;
        }
    }
-    return $anyerr;
+
+    return 0;
 }

 # write the test to the given file
--- a/tests/runner.pm
+++ b/tests/runner.pm
@ -1149,12 +1149,9 @@ sub singletest_postcheck {
        }
    }

-    if($checktests) {
-        loadtest("${TESTDIR}/test${testnum}");  # load the raw original data
-        if(checktest()) {
-            logmsg " $testnum: postcheck FAILED: issue(s) found in test data\n";
-            return -1;
-        }
+    if($checktests && checktest("${TESTDIR}/test${testnum}")) {
+        logmsg " $testnum: postcheck FAILED: issue(s) found in test data\n";
+        return -1;
    }

    return 0;
@ -1181,7 +1178,7 @@ sub runner_test_preprocess {
    # ignore any error here--if there were one, it would have been
    # caught during the selection phase and this test would not be
    # running now
-    loadtest("${TESTDIR}/test${testnum}");
+    loadtest("${TESTDIR}/test${testnum}", 1);
    readtestkeywords();

    ###################################################################
--- a/tests/runtests.pl
+++ b/tests/runtests.pl
@ -1078,7 +1078,7 @@ sub singletest_shouldrun {
        $errorreturncode = 2;
    }

-    if(loadtest("${TESTDIR}/test${testnum}")) {
+    if(loadtest("${TESTDIR}/test${testnum}", 1)) {
        if($verbose) {
            # this is not a test
            logmsg "RUN: $testnum doesn't look like a test case\n";
@ -1191,7 +1191,7 @@ sub singletest_shouldrun {
        }
    }

-    if($why && $checktests && checktest()) {
+    if($why && $checktests && checktest("${TESTDIR}/test${testnum}")) {
        logmsg "Warning: issue(s) found in test data: ${TESTDIR}/test${testnum}\n";
    }

@ -1980,7 +1980,7 @@ sub singletest {
        ###################################################################
        # Load test file so CI registration can get the right data before the
        # runner is called
-        loadtest("${TESTDIR}/test${testnum}");
+        loadtest("${TESTDIR}/test${testnum}", 1);

        ###################################################################
        # Register the test case with the CI environment
@ -3281,7 +3281,7 @@ if(%skipped && !$short) {
 sub testnumdetails {
    my ($desc, $numlist) = @_;
    foreach my $testnum (split(' ', $numlist)) {
-        if(!loadtest("${TESTDIR}/test${testnum}")) {
+        if(!loadtest("${TESTDIR}/test${testnum}", 1)) {
            my @info_keywords = getpart("info", "keywords");
            my $testname = (getpart("client", "name"))[0];
            chomp $testname;
--- a/tests/util.py
+++ b/tests/util.py
@ -75,8 +75,8 @@ class TestData(object):

        log.debug("Parsing file %s", filename)

-        with open(filename, "rb") as f:
-            contents = f.read().decode("utf-8")
+        with open(filename, "r", encoding='us-ascii') as f:
+            contents = f.read()

        m = REPLY_DATA.search(contents)
        if not m:
@ -88,5 +88,5 @@ class TestData(object):

 if __name__ == '__main__':
    td = TestData("./data")
-    data = td.get_test_data(1)
+    data = td.get_test_data(1451)
    print(data)