Skip to content

Commit e730cd0

Browse files
committed
[skip actions] [review_short_line] 2025-10-26T08:46:03+02:00
1 parent 32a29fb commit e730cd0

File tree

2 files changed

+46
-18
lines changed

2 files changed

+46
-18
lines changed

review.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ set -x
88

99
find data -type f -exec chmod -x {} +
1010

11-
.venv/bin/python review_data.py meta data >review.$(date +%Y%m%d_%H%M%S).$(git rev-parse HEAD).$(git status --porcelain | grep -v '??' | wc -l).txt
11+
.venv/bin/python review_data.py meta data --short_line >review.$(date +%Y%m%d_%H%M%S).$(git rev-parse HEAD).$(git status --porcelain | grep -v '??' | wc -l).txt
1212

1313
.venv/bin/python -m benchmark --scanner credsweeper --load .ci/empty_report.json | tee .ci/benchmark.txt

review_data.py

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
EXIT_SUCCESS = 0
2525
EXIT_FAILURE = 1
2626

27+
HUNK_SIZE = 80
28+
2729

2830
@functools.cache
2931
def get_excluding_extensions() -> set[str]:
@@ -39,7 +41,14 @@ def read_cache(path) -> list[str]:
3941
return f.read().replace("\r\n", '\n').replace('\r', '\n').split('\n')
4042

4143

42-
def read_data(path, line_start, line_end, value_start, value_end, ground_truth, creds: List[MetaCred]):
44+
def read_data(path: str,
45+
line_start: int,
46+
line_end: int,
47+
value_start: int,
48+
value_end: int,
49+
ground_truth: str,
50+
short_line: bool,
51+
creds: List[MetaCred]):
4352
lines = read_cache(path)
4453
if line_start == line_end:
4554
data_line = lines[line_start - 1]
@@ -62,6 +71,7 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
6271
line_found_in_cred = False
6372
correct_value_position = False
6473
if creds:
74+
# only if review with credsweeper report
6575
for cred in creds:
6676
if cred.path == path:
6777
if line_start == cred.line_start and line_end == cred.line_start:
@@ -93,19 +103,26 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
93103
line_found_in_cred = True
94104
correct_value_position = True
95105

96-
if 0 <= value_start and 0 <= value_end:
97-
line = data_line[:value_start] \
106+
text_start = value_start - HUNK_SIZE \
107+
if short_line and 0 < value_start - HUNK_SIZE else 0
108+
text_end = multiline_end_offset + value_end + HUNK_SIZE \
109+
if short_line and 0 <= value_end and len(data_line) > multiline_end_offset + value_end + HUNK_SIZE \
110+
else len(data_line)
111+
if 0 <= value_start <= value_end or line_start < line_end:
112+
line = data_line[text_start:value_start] \
98113
+ Back.LIGHTYELLOW_EX \
99114
+ data_line[value_start:value_end + multiline_end_offset] \
100115
+ Style.RESET_ALL \
101116
+ fore_style \
102-
+ data_line[value_end + multiline_end_offset:]
103-
elif value_start >= 0 > value_end:
104-
line = data_line[:value_start] \
117+
+ data_line[value_end + multiline_end_offset:text_end]
118+
elif value_end < 0 <= value_start:
119+
line = data_line[text_start:value_start] \
105120
+ Style.BRIGHT \
106121
+ data_line[value_start:]
107122
else:
108-
line = data_line
123+
line = data_line[:HUNK_SIZE]
124+
if line_start < line_end:
125+
line.replace('\n', Style.RESET_ALL + '\n' + fore_style + Back.LIGHTYELLOW_EX)
109126
print(f"{line_start}:{Style.RESET_ALL}{fore_style}{line}{Style.RESET_ALL}", flush=True)
110127
if not correct_value_position:
111128
print("Possible wrong value markup", flush=True)
@@ -115,7 +132,7 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
115132
test_line = data_line.lower()
116133
if not any(
117134
x in test_line for x in
118-
["api", "pass", "secret", "pw", "key", "credential", "token", "auth", "nonce", "salt", "cert"]
135+
["api", "pass", "secret", "pw", "key", "credential", "token", "auth", "nonce", "salt"]
119136
):
120137
repo_id = path.split('/')[1]
121138
subprocess.check_call(
@@ -128,10 +145,12 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
128145

129146
def review(meta_dir: str,
130147
data_dir: str,
148+
short_line: bool,
131149
check_only: bool,
132150
data_filter: dict,
151+
category: Optional[str] = None,
133152
load_json: Optional[str] = None,
134-
category: Optional[str] = None) -> int:
153+
) -> int:
135154
errors = 0
136155
duplicates = 0
137156
if not os.path.exists(meta_dir):
@@ -163,13 +182,15 @@ def review(meta_dir: str,
163182
if not check_only:
164183
print(str(row), flush=True)
165184
try:
166-
read_data(row.FilePath,
167-
row.LineStart,
168-
row.LineEnd,
169-
row.ValueStart,
170-
row.ValueEnd,
171-
row.GroundTruth,
172-
creds)
185+
read_data(path=row.FilePath,
186+
line_start=row.LineStart,
187+
line_end=row.LineEnd,
188+
value_start=row.ValueStart,
189+
value_end=row.ValueEnd,
190+
ground_truth=row.GroundTruth,
191+
short_line=short_line,
192+
creds=creds,
193+
)
173194
except Exception as exc:
174195
print(f"Failure {row}", exc, flush=True)
175196
errors += 1
@@ -240,6 +261,7 @@ def main(argv) -> int:
240261

241262
parser.add_argument("meta_dir", help="Markup location", nargs='?', default="meta")
242263
parser.add_argument("data_dir", help="Dataset location", nargs='?', default="data")
264+
parser.add_argument("--short_line", help="Reduce huge line in review", action='store_true')
243265
parser.add_argument("--check_only", help="Check meta markup only", action='store_true')
244266
parser.add_argument("-T", help="Show TRUE markup", action="store_true")
245267
parser.add_argument("-F", help="Show FALSE markup", action="store_true")
@@ -257,7 +279,13 @@ def main(argv) -> int:
257279
_data_filter["T"] = _args.T
258280
_data_filter["F"] = _args.F
259281
_data_filter["X"] = _args.X
260-
return review(_args.meta_dir, _args.data_dir, bool(_args.check_only), _data_filter, _args.load, _args.category)
282+
return review(meta_dir=_args.meta_dir,
283+
data_dir=_args.data_dir,
284+
short_line=bool(_args.short_line),
285+
check_only=bool(_args.check_only),
286+
data_filter=_data_filter,
287+
load_json=_args.load,
288+
category=_args.category)
261289

262290

263291
if __name__ == """__main__""":

0 commit comments

Comments
 (0)