Skip to content

Commit f4b46cf

Browse files
committed
Provide an 'auto' option for --line_terminator
If 'auto' is chosen, output CRLF line endings if more than 50% of the input is CRLF, otherwise LF. The other choicese, CRLF or LF, behave as before. NB: The default changes with this change: previously, we always converted a CRLF input to LF, now the default is 'auto'
1 parent 6834307 commit f4b46cf

File tree

7 files changed

+118
-38
lines changed

7 files changed

+118
-38
lines changed

verible/common/formatting/basic-format-style-init.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,12 @@ ABSL_FLAG(int, over_column_limit_penalty, 100,
3737
ABSL_FLAG(int, line_break_penalty, 2,
3838
"Penalty added to solution for each introduced line break.");
3939

40-
ABSL_FLAG(verible::LineTerminatorStyle, line_terminator,
41-
verible::LineTerminatorStyle::kLF, "Line terminator");
40+
ABSL_FLAG(verible::LineTerminatorOptionStyle, line_terminator,
41+
verible::LineTerminatorOptionStyle::kAuto,
42+
"Line terminator. "
43+
"The 'auto' option chooses the output depending on the observed "
44+
"input. The explicit choice CR or CRLF fixes the output line "
45+
"terminator.");
4246

4347
namespace verible {
4448
void InitializeFromFlags(BasicFormatStyle *style) {

verible/common/formatting/basic-format-style.cc

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,30 @@ std::string AbslUnparseFlag(const IndentationStyle &mode) {
4848
return stream.str();
4949
}
5050

51-
static const verible::EnumNameMap<LineTerminatorStyle> &
52-
LineTerminatorStyleStrings() {
53-
static const verible::EnumNameMap<LineTerminatorStyle>
54-
kLineTerminatorStyleStringMap({
55-
{"CRLF", LineTerminatorStyle::kCRLF},
56-
{"LF", LineTerminatorStyle::kLF},
51+
static const verible::EnumNameMap<LineTerminatorOptionStyle> &
52+
LineTerminatorOptionStyleStrings() {
53+
static const verible::EnumNameMap<LineTerminatorOptionStyle>
54+
kLineTerminatorOptionStyleStringMap({
55+
{"CRLF", LineTerminatorOptionStyle::kCRLF},
56+
{"LF", LineTerminatorOptionStyle::kLF},
57+
{"auto", LineTerminatorOptionStyle::kAuto},
5758
});
58-
return kLineTerminatorStyleStringMap;
59+
return kLineTerminatorOptionStyleStringMap;
60+
}
61+
62+
std::ostream &operator<<(std::ostream &stream,
63+
LineTerminatorOptionStyle style) {
64+
return LineTerminatorOptionStyleStrings().Unparse(style, stream);
65+
}
66+
67+
bool AbslParseFlag(std::string_view text, LineTerminatorOptionStyle *mode,
68+
std::string *error) {
69+
return LineTerminatorOptionStyleStrings().Parse(text, mode, error,
70+
"LineTerminatorOptionStyle");
71+
}
72+
73+
std::string AbslUnparseFlag(const LineTerminatorOptionStyle &mode) {
74+
return std::string{LineTerminatorOptionStyleStrings().EnumName(mode)};
5975
}
6076

6177
void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream) {
@@ -69,18 +85,4 @@ void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream) {
6985
}
7086
}
7187

72-
std::ostream &operator<<(std::ostream &stream, LineTerminatorStyle style) {
73-
return LineTerminatorStyleStrings().Unparse(style, stream);
74-
}
75-
76-
bool AbslParseFlag(std::string_view text, LineTerminatorStyle *mode,
77-
std::string *error) {
78-
return LineTerminatorStyleStrings().Parse(text, mode, error,
79-
"LineTerminatorStyle");
80-
}
81-
82-
std::string AbslUnparseFlag(const LineTerminatorStyle &mode) {
83-
return std::string{LineTerminatorStyleStrings().EnumName(mode)};
84-
}
85-
8688
} // namespace verible

verible/common/formatting/basic-format-style.h

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,20 @@
2121

2222
namespace verible {
2323

24-
enum class LineTerminatorStyle {
25-
// Line Feed `\n` (UNIX Style)
24+
// The option style allows for 'auto' which then is converted to the observed
25+
// style from the input (into a LineTerminatorStyle).
26+
enum class LineTerminatorOptionStyle {
2627
kLF,
27-
// Carriage return + Line Feed `\r\n` (DOS Style)
2828
kCRLF,
29+
kAuto,
2930
};
3031

31-
void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream);
32+
std::ostream &operator<<(std::ostream &stream, LineTerminatorOptionStyle style);
3233

33-
std::ostream &operator<<(std::ostream &stream, LineTerminatorStyle style);
34+
bool AbslParseFlag(std::string_view, LineTerminatorOptionStyle *,
35+
std::string *);
3436

35-
bool AbslParseFlag(std::string_view, LineTerminatorStyle *, std::string *);
36-
37-
std::string AbslUnparseFlag(const LineTerminatorStyle &);
37+
std::string AbslUnparseFlag(const LineTerminatorOptionStyle &);
3838

3939
// Style configuration common to all languages.
4040
struct BasicFormatStyle {
@@ -57,8 +57,9 @@ struct BasicFormatStyle {
5757
// Penalty added to solution for each introduced line break.
5858
int line_break_penalty = 2;
5959

60-
// Line terminator character sequence
61-
LineTerminatorStyle line_terminator = LineTerminatorStyle::kLF;
60+
// Line terminator character sequence. Consistent LF for unit tests, but
61+
// note, the command line flag sets this to 'auto'.
62+
LineTerminatorOptionStyle line_terminator = LineTerminatorOptionStyle::kLF;
6263

6364
// -- Note: when adding new fields, add them in basic_format_style_init.cc
6465
};
@@ -81,6 +82,16 @@ bool AbslParseFlag(std::string_view, IndentationStyle *, std::string *);
8182

8283
std::string AbslUnparseFlag(const IndentationStyle &);
8384

85+
// Concrete terminator.
86+
enum class LineTerminatorStyle {
87+
// Line Feed `\n` (UNIX Style)
88+
kLF,
89+
// Carriage return + Line Feed `\r\n` (DOS Style)
90+
kCRLF,
91+
};
92+
93+
void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream);
94+
8495
} // namespace verible
8596

8697
#endif // VERIBLE_COMMON_FORMATTING_BASIC_FORMAT_STYLE_H_

verible/verilog/formatting/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ cc_library(
138138
":format-style",
139139
":token-annotator",
140140
":tree-unwrapper",
141+
"//verible/common/formatting:basic-format-style",
141142
"//verible/common/formatting:format-token",
142143
"//verible/common/formatting:layout-optimizer",
143144
"//verible/common/formatting:line-wrap-searcher",

verible/verilog/formatting/formatter.cc

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include <algorithm>
1818
#include <cstddef>
19+
#include <cstdint>
1920
#include <cstdlib>
2021
#include <functional>
2122
#include <iostream>
@@ -29,6 +30,7 @@
2930
#include "absl/status/status.h"
3031
#include "absl/status/statusor.h"
3132
#include "absl/strings/str_cat.h"
33+
#include "verible/common/formatting/basic-format-style.h"
3234
#include "verible/common/formatting/format-token.h"
3335
#include "verible/common/formatting/layout-optimizer.h"
3436
#include "verible/common/formatting/line-wrap-searcher.h"
@@ -970,6 +972,50 @@ Status Formatter::Format(const ExecutionControl &control) {
970972
return absl::OkStatus();
971973
}
972974

975+
// This might maybe something in the verible/common/text
976+
struct TerminatorCount {
977+
int64_t line_count = 0;
978+
int64_t crlf_count = 0;
979+
};
980+
981+
static TerminatorCount CountLineTerminators(std::string_view text,
982+
int64_t count_at_most) {
983+
// TODO: maybe it is sufficient to just look at the first 1000 lines or so.
984+
TerminatorCount result;
985+
size_t pos = 0;
986+
while ((pos = text.find_first_of('\n', pos)) != std::string_view::npos) {
987+
++result.line_count;
988+
if (pos > 0 && text[pos - 1] == '\r') {
989+
++result.crlf_count;
990+
}
991+
++pos;
992+
if (result.line_count >= count_at_most) {
993+
break;
994+
}
995+
}
996+
return result;
997+
}
998+
999+
// From options, extraact the line terminator style. If 'auto' was chosen,
1000+
// attempt to determine from text.
1001+
static verible::LineTerminatorStyle DetermineOutputLineTerminator(
1002+
verible::LineTerminatorOptionStyle from_options, std::string_view text) {
1003+
static constexpr int64_t kCountAtMost = 100; // sufficient stats
1004+
switch (from_options) {
1005+
case verible::LineTerminatorOptionStyle::kCRLF:
1006+
return verible::LineTerminatorStyle::kCRLF;
1007+
case verible::LineTerminatorOptionStyle::kLF:
1008+
return verible::LineTerminatorStyle::kLF;
1009+
case verible::LineTerminatorOptionStyle::kAuto: {
1010+
const TerminatorCount counts = CountLineTerminators(text, kCountAtMost);
1011+
return (counts.crlf_count >= counts.line_count / 2)
1012+
? verible::LineTerminatorStyle::kCRLF
1013+
: verible::LineTerminatorStyle::kLF;
1014+
}
1015+
}
1016+
return verible::LineTerminatorStyle::kLF;
1017+
}
1018+
9731019
void Formatter::Emit(bool include_disabled, std::ostream &stream) const {
9741020
const std::string_view full_text(text_structure_.Contents());
9751021
std::function<bool(const verible::TokenInfo &)> include_token_p;
@@ -981,6 +1027,8 @@ void Formatter::Emit(bool include_disabled, std::ostream &stream) const {
9811027
};
9821028
}
9831029

1030+
const verible::LineTerminatorStyle out_terminator =
1031+
DetermineOutputLineTerminator(style_.line_terminator, full_text);
9841032
int position = 0; // tracks with the position in the original full_text
9851033
for (const verible::FormattedExcerpt &line : formatted_lines_) {
9861034
// TODO(fangism): The handling of preserved spaces before tokens is messy:
@@ -993,7 +1041,7 @@ void Formatter::Emit(bool include_disabled, std::ostream &stream) const {
9931041
full_text.substr(position, front_offset - position));
9941042
FormatWhitespaceWithDisabledByteRanges(full_text, leading_whitespace,
9951043
disabled_ranges_, include_disabled,
996-
stream, style_.line_terminator);
1044+
stream, out_terminator);
9971045

9981046
// When front of first token is format-disabled, the previous call will
9991047
// already cover the space up to the front token, in which case,
@@ -1010,7 +1058,7 @@ void Formatter::Emit(bool include_disabled, std::ostream &stream) const {
10101058
const std::string_view trailing_whitespace(full_text.substr(position));
10111059
FormatWhitespaceWithDisabledByteRanges(full_text, trailing_whitespace,
10121060
disabled_ranges_, include_disabled,
1013-
stream, style_.line_terminator);
1061+
stream, out_terminator);
10141062
}
10151063

10161064
} // namespace formatter

verible/verilog/tools/formatter/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ To pipe from stdin, use '-' as <file>.
2323
default: 2;
2424
--line_break_penalty (Penalty added to solution for each introduced line
2525
break.); default: 2;
26+
--line_terminator (Line terminator. The 'auto' option chooses the output
27+
depending on the observed input. The explicit choice CR or CRLF fixes the
28+
output line terminator.); default: auto;
2629
--over_column_limit_penalty (For penalty minimization, this represents the
2730
baseline penalty value of exceeding the column limit. Additional penalty
2831
of 1 is incurred for each character over this limit); default: 100;

verible/verilog/tools/formatter/format_line_terminator_test.sh

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,27 @@ done
3737
# Test any combination of input line terminators and output line terminators.
3838
# Test both inline formatting and standard output
3939
for original_newline in LF CRLF; do
40+
PROPER_INPUT_FILE="${MY_INPUT_FILE}$original_newline"
41+
42+
# For 'auto', the line terminators of the output will be the same as input.
43+
cp $PROPER_INPUT_FILE ${MY_OUTPUT_FILE}
44+
${formatter} --line_terminator=auto ${PROPER_INPUT_FILE} > ${MY_OUTPUT_FILE}
45+
cmp ${MY_OUTPUT_FILE} $PROPER_INPUT_FILE || exit 1
46+
47+
cp $PROPER_INPUT_FILE ${MY_OUTPUT_FILE}
48+
${formatter} --line_terminator=auto --inplace ${MY_OUTPUT_FILE}
49+
cmp ${MY_OUTPUT_FILE} $PROPER_INPUT_FILE || exit 2
50+
51+
# With an explicit target newline, we expect that particular one.
4052
for target_newline in LF CRLF; do
41-
PROPER_INPUT_FILE="${MY_INPUT_FILE}$original_newline"
4253
PROPER_EXPECT_FILE="${MY_EXPECT_FILE}$target_newline"
4354

4455
${formatter} --line_terminator=$target_newline $PROPER_INPUT_FILE > ${MY_OUTPUT_FILE}
45-
cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 1
56+
cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 3
4657

4758
cp $PROPER_INPUT_FILE ${MY_OUTPUT_FILE}
4859
${formatter} --line_terminator=$target_newline --inplace ${MY_OUTPUT_FILE}
49-
cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 2
60+
cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 4
5061
done
5162
done
5263

0 commit comments

Comments
 (0)