2424EXIT_SUCCESS = 0
2525EXIT_FAILURE = 1
2626
27+ HUNK_SIZE = 80
28+
2729
2830@functools .cache
2931def get_excluding_extensions () -> set [str ]:
@@ -39,7 +41,14 @@ def read_cache(path) -> list[str]:
3941 return f .read ().replace ("\r \n " , '\n ' ).replace ('\r ' , '\n ' ).split ('\n ' )
4042
4143
42- def read_data (path , line_start , line_end , value_start , value_end , ground_truth , creds : List [MetaCred ]):
44+ def read_data (path : str ,
45+ line_start : int ,
46+ line_end : int ,
47+ value_start : int ,
48+ value_end : int ,
49+ ground_truth : str ,
50+ short_line : bool ,
51+ creds : List [MetaCred ]):
4352 lines = read_cache (path )
4453 if line_start == line_end :
4554 data_line = lines [line_start - 1 ]
@@ -62,6 +71,7 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
6271 line_found_in_cred = False
6372 correct_value_position = False
6473 if creds :
74+ # only if review with credsweeper report
6575 for cred in creds :
6676 if cred .path == path :
6777 if line_start == cred .line_start and line_end == cred .line_start :
@@ -93,19 +103,26 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
93103 line_found_in_cred = True
94104 correct_value_position = True
95105
96- if 0 <= value_start and 0 <= value_end :
97- line = data_line [:value_start ] \
106+ text_start = value_start - HUNK_SIZE \
107+ if short_line and 0 < value_start - HUNK_SIZE else 0
108+ text_end = multiline_end_offset + value_end + HUNK_SIZE \
109+ if short_line and 0 <= value_end and len (data_line ) > multiline_end_offset + value_end + HUNK_SIZE \
110+ else len (data_line )
111+ if 0 <= value_start <= value_end or line_start < line_end :
112+ line = data_line [text_start :value_start ] \
98113 + Back .LIGHTYELLOW_EX \
99114 + data_line [value_start :value_end + multiline_end_offset ] \
100115 + Style .RESET_ALL \
101116 + fore_style \
102- + data_line [value_end + multiline_end_offset :]
103- elif value_start >= 0 > value_end :
104- line = data_line [:value_start ] \
117+ + data_line [value_end + multiline_end_offset :text_end ]
118+ elif value_end < 0 <= value_start :
119+ line = data_line [text_start :value_start ] \
105120 + Style .BRIGHT \
106121 + data_line [value_start :]
107122 else :
108- line = data_line
123+ line = data_line [:HUNK_SIZE ]
124+ if line_start < line_end :
125+ line .replace ('\n ' , Style .RESET_ALL + '\n ' + fore_style + Back .LIGHTYELLOW_EX )
109126 print (f"{ line_start } :{ Style .RESET_ALL } { fore_style } { line } { Style .RESET_ALL } " , flush = True )
110127 if not correct_value_position :
111128 print ("Possible wrong value markup" , flush = True )
@@ -115,7 +132,7 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
115132 test_line = data_line .lower ()
116133 if not any (
117134 x in test_line for x in
118- ["api" , "pass" , "secret" , "pw" , "key" , "credential" , "token" , "auth" , "nonce" , "salt" , "cert" ]
135+ ["api" , "pass" , "secret" , "pw" , "key" , "credential" , "token" , "auth" , "nonce" , "salt" ]
119136 ):
120137 repo_id = path .split ('/' )[1 ]
121138 subprocess .check_call (
@@ -128,10 +145,12 @@ def read_data(path, line_start, line_end, value_start, value_end, ground_truth,
128145
129146def review (meta_dir : str ,
130147 data_dir : str ,
148+ short_line : bool ,
131149 check_only : bool ,
132150 data_filter : dict ,
151+ category : Optional [str ] = None ,
133152 load_json : Optional [str ] = None ,
134- category : Optional [ str ] = None ) -> int :
153+ ) -> int :
135154 errors = 0
136155 duplicates = 0
137156 if not os .path .exists (meta_dir ):
@@ -163,13 +182,15 @@ def review(meta_dir: str,
163182 if not check_only :
164183 print (str (row ), flush = True )
165184 try :
166- read_data (row .FilePath ,
167- row .LineStart ,
168- row .LineEnd ,
169- row .ValueStart ,
170- row .ValueEnd ,
171- row .GroundTruth ,
172- creds )
185+ read_data (path = row .FilePath ,
186+ line_start = row .LineStart ,
187+ line_end = row .LineEnd ,
188+ value_start = row .ValueStart ,
189+ value_end = row .ValueEnd ,
190+ ground_truth = row .GroundTruth ,
191+ short_line = short_line ,
192+ creds = creds ,
193+ )
173194 except Exception as exc :
174195 print (f"Failure { row } " , exc , flush = True )
175196 errors += 1
@@ -240,6 +261,7 @@ def main(argv) -> int:
240261
241262 parser .add_argument ("meta_dir" , help = "Markup location" , nargs = '?' , default = "meta" )
242263 parser .add_argument ("data_dir" , help = "Dataset location" , nargs = '?' , default = "data" )
264+ parser .add_argument ("--short_line" , help = "Reduce huge line in review" , action = 'store_true' )
243265 parser .add_argument ("--check_only" , help = "Check meta markup only" , action = 'store_true' )
244266 parser .add_argument ("-T" , help = "Show TRUE markup" , action = "store_true" )
245267 parser .add_argument ("-F" , help = "Show FALSE markup" , action = "store_true" )
@@ -257,7 +279,13 @@ def main(argv) -> int:
257279 _data_filter ["T" ] = _args .T
258280 _data_filter ["F" ] = _args .F
259281 _data_filter ["X" ] = _args .X
260- return review (_args .meta_dir , _args .data_dir , bool (_args .check_only ), _data_filter , _args .load , _args .category )
282+ return review (meta_dir = _args .meta_dir ,
283+ data_dir = _args .data_dir ,
284+ short_line = bool (_args .short_line ),
285+ check_only = bool (_args .check_only ),
286+ data_filter = _data_filter ,
287+ load_json = _args .load ,
288+ category = _args .category )
261289
262290
263291if __name__ == """__main__""" :
0 commit comments