| --[[ |
| Licensed according to the included 'LICENSE' document |
| Author: Thomas Harning Jr <harningt@gmail.com> |
| ]] |
| local lpeg = require("lpeg") |
| local util = require("json.decode.util") |
| local merge = require("json.util").merge |
| |
| local tonumber = tonumber |
| local string_char = require("string").char |
| local floor = require("math").floor |
| local table_concat = require("table").concat |
| |
| local error = error |
| module("json.decode.strings") |
| local function get_error(item) |
| local fmt_string = item .. " in string [%q] @ %i:%i" |
| return function(data, index) |
| local line, line_index, bad_char, last_line = util.get_invalid_character_info(data, index) |
| local err = fmt_string:format(bad_char, line, line_index) |
| error(err) |
| end |
| end |
| |
| local bad_unicode = get_error("Illegal unicode escape") |
| local bad_hex = get_error("Illegal hex escape") |
| local bad_character = get_error("Illegal character") |
| local bad_escape = get_error("Illegal escape") |
| |
| local knownReplacements = { |
| ["'"] = "'", |
| ['"'] = '"', |
| ['\\'] = '\\', |
| ['/'] = '/', |
| b = '\b', |
| f = '\f', |
| n = '\n', |
| r = '\r', |
| t = '\t', |
| v = '\v', |
| z = '\z' |
| } |
| |
| -- according to the table at http://da.wikipedia.org/wiki/UTF-8 |
| local function utf8DecodeUnicode(code1, code2) |
| code1, code2 = tonumber(code1, 16), tonumber(code2, 16) |
| if code1 == 0 and code2 < 0x80 then |
| return string_char(code2) |
| end |
| if code1 < 0x08 then |
| return string_char( |
| 0xC0 + code1 * 4 + floor(code2 / 64), |
| 0x80 + code2 % 64) |
| end |
| return string_char( |
| 0xE0 + floor(code1 / 16), |
| 0x80 + (code1 % 16) * 4 + floor(code2 / 64), |
| 0x80 + code2 % 64) |
| end |
| |
| local function decodeX(code) |
| code = tonumber(code, 16) |
| return string_char(code) |
| end |
| |
| local doSimpleSub = lpeg.C(lpeg.S("'\"\\/bfnrtvz")) / knownReplacements |
| local doUniSub = lpeg.P('u') * (lpeg.C(util.hexpair) * lpeg.C(util.hexpair) + lpeg.P(bad_unicode)) |
| local doXSub = lpeg.P('x') * (lpeg.C(util.hexpair) + lpeg.P(bad_hex)) |
| |
| local defaultOptions = { |
| badChars = '', |
| additionalEscapes = false, -- disallow untranslated escapes |
| escapeCheck = #lpeg.S('bfnrtv/\\"xu\'z'), -- no check on valid characters |
| decodeUnicode = utf8DecodeUnicode, |
| strict_quotes = false |
| } |
| |
| default = nil -- Let the buildCapture optimization take place |
| |
| strict = { |
| badChars = '\b\f\n\r\t\v', |
| additionalEscapes = false, -- no additional escapes |
| escapeCheck = #lpeg.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped |
| strict_quotes = true |
| } |
| |
| local function buildCaptureString(quote, badChars, escapeMatch) |
| local captureChar = (1 - lpeg.S("\\" .. badChars .. quote)) + (lpeg.P("\\") / "" * escapeMatch) |
| captureChar = captureChar + (-#lpeg.P(quote) * lpeg.P(bad_character)) |
| local captureString = captureChar^0 |
| return lpeg.P(quote) * lpeg.Cs(captureString) * lpeg.P(quote) |
| end |
| |
| local function buildCapture(options) |
| options = options and merge({}, defaultOptions, options) or defaultOptions |
| local quotes = { '"' } |
| if not options.strict_quotes then |
| quotes[#quotes + 1] = "'" |
| end |
| local escapeMatch = doSimpleSub |
| escapeMatch = escapeMatch + doXSub / decodeX |
| escapeMatch = escapeMatch + doUniSub / options.decodeUnicode |
| if options.additionalEscapes then |
| escapeMatch = escapeMatch + options.additionalEscapes |
| end |
| if options.escapeCheck then |
| escapeMatch = options.escapeCheck * escapeMatch + lpeg.P(bad_escape) |
| end |
| local captureString |
| for i = 1, #quotes do |
| local cap = buildCaptureString(quotes[i], options.badChars, escapeMatch) |
| if captureString == nil then |
| captureString = cap |
| else |
| captureString = captureString + cap |
| end |
| end |
| return captureString |
| end |
| |
| function register_types() |
| util.register_type("STRING") |
| end |
| |
| function load_types(options, global_options, grammar) |
| local capture = buildCapture(options) |
| local string_id = util.types.STRING |
| grammar[string_id] = capture |
| util.append_grammar_item(grammar, "VALUE", lpeg.V(string_id)) |
| end |