|
1 | 1 | #!/usr/bin/awk -f |
2 | 2 | # |
3 | 3 | # Software: JSON.awk - a practical JSON parser written in awk |
4 | | -# Version: 1.4.1 |
| 4 | +# Version: 1.4.2 |
5 | 5 | # Copyright (c) 2013-2020, step |
6 | 6 | # License: MIT or Apache 2 |
7 | 7 | # Project home: https://github.com/step-/JSON.awk |
@@ -338,21 +338,21 @@ function tokenize(a1) { #{{{1 |
338 | 338 | # - reduce [:cntrl:] to [\000-\037]; https://github.com/step-/JSON.awk/issues/5 |
339 | 339 | # - reduce [:space:] to [ \t\n\r]; https://tools.ietf.org/html/rfc8259#page-5 ws |
340 | 340 | # - replace {4} quantifier with three [0-9a-fA-F] for mawk; https://unix.stackexchange.com/a/506125 |
341 | | -# - BOM encodings UTF-8, UTF16-LE and UTF-BE; https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding |
| 341 | +# - UTF-8 BOM signature; https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding |
342 | 342 | # ---------- |
343 | 343 | # TOKENS = BOM "|" STRING "|" NUMBER "|" KEYWORD "|" SPACE "|." |
344 | | -# BOM = "^\357\273\277|^\377\376|^\376\377" |
| 344 | +# BOM = "^\357\273\277" # cf. issue #17 |
345 | 345 | # STRING = "\"" CHAR "*(" ESCAPE CHAR "*)*\"" |
346 | 346 | # ESCAPE = "(\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})" |
347 | 347 | # CHAR = "[^[:cntrl:]\\\"]" |
348 | 348 | # NUMBER = "-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?" |
349 | 349 | # KEYWORD = "null|false|true" |
350 | 350 | # SPACE = "[[:space:]]+" |
351 | 351 |
|
352 | | - gsub(/^\357\273\277|^\377\376|^\376\377|"[^"\\\000-\037]*((\\[^u\000-\037]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])[^"\\\000-\037]*)*"|-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?|null|false|true|[ \t\n\r]+|./, "\n&", a1) |
| 352 | + gsub(/^\357\273\277|"[^"\\\000-\037]*((\\[^u\000-\037]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])[^"\\\000-\037]*)*"|-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?|null|false|true|[ \t\n\r]+|./, "\n&", a1) |
353 | 353 | gsub("\n" "[ \t\n\r]+", "\n", a1) |
354 | 354 | # ^\n BOM? |
355 | | - sub(/^\n((\357\273\277|\377\376|\376\377)\n)?/, "", a1) |
| 355 | + sub(/^\n(\357\273\277\n)?/, "", a1) |
356 | 356 | ITOKENS=0 # get_token() helper |
357 | 357 | return NTOKENS = split(a1, TOKENS, /\n/) |
358 | 358 | } |
|
0 commit comments