JSON Parser Pt3: Objects / tying it together
mari tang
Posted on May 17, 2019
Here's where we left off from Pt 2. We've got arrays handled, so the only datatype left to handle is objects!
function JSONParser(jstring){
if(jstring[0] === '"') return jstring.slice(1, jstring.length-1);
if(jstring[0] === 't') return true;
if(jstring[0] === 'f') return false;
if(jstring[0] === 'u') return undefined;
if(jstring[0] === 'n') return null;
if(jstring.charCodeAt() >= 48 && jstring.charCodeAt() <= 57) return Number(jstring);
if(jstring[0] === '[') return parseArray(jstring);
}
const openings = {
'"': '"',
'[': ']',
'{': '}'
};
const stack = [];
function parseArray(jstring){
const output = [];
if(jstring.length < 3) return output;
const valueStr = jstring.slice(1, jstring.length-1)
let start = 0;
for(let i = 0; i <= valueStr.length; i++){
// PLEASE NOTE: all instances of '\\ ' should actually be '\\'
// Dev.to's syntax highlighting doesn't appropriately account for the fact that the second backslash is escaped by the first.
if(stack[stack.length-1] === '\\ '){
stack.pop();
continue;
} else if(valueStr[i] === '\\ '){
stack.push('\\ ');
}
if(stack[stack.length-1] === valueStr[i] && stack[stack.length-1] !== '"' ||
stack[stack.length-1] === valueStr[i] && valueStr[i] === '"'){
stack.pop();
} else if(openings[valueStr[i]] && stack[stack.length-1] !== '"'){
stack.push(openings[valueStr[i]]);
}
if (!stack.length && valueStr[i] === ',' || i === valueStr.length) {
const curVal = JSONParser(valueStr.slice(start, i));
output.push(curVal);
start = i+1;
}
}
return output;
}
For a quick recap, we're handling primitives based on the first character in their JSON strings. If we run into an array, we need slightly more complicated logic, which we're encapsulating in its own functionality.
The goal of our parseArray is to correctly handle an array that may have some number of things inside of it, each of which we can handle through a recursive call on our JSONParser.
Recursively calling our JSONParser has the benefit of ensuring that we can handle any sort of datatype that we've figured out how to handle, including arrays themselves, thus allowing us to parse arbitrarily deeply nested arrays (given that we don't hit an overflow).
So, on to objects. We'll want to use a similar strategy with objects as we do arrays, but they'll be a little more complicated because they operate on key/value pairs. As such, we'll need to chunk things out based on both commas and colons. If we have an object like this: {"hello":"world","I'm":"here"}
and break it down by commas, we'll end up with two items: "hello":"world"
and "I'm":"here"
. We'll have to look for colons to further separate those two parts, such that we'll get key/value pairs of "hello"
and "world"
, "I'm"
and "here"
.
Since our code is going to use some of the same ideas as our array parser, we'll start by copying and renaming it.
function parseObj(jstring){
const output = [];
if(jstring.length < 3) return output;
const valueStr = jstring.slice(1, jstring.length-1)
let start = 0;
for(let i = 0; i <= valueStr.length; i++){
// PLEASE NOTE: all instances of '\\ ' should actually be '\\'
// Dev.to's syntax highlighting doesn't appropriately account for the fact that the second backslash is escaped by the first.
if(stack[stack.length-1] === '\\ '){
stack.pop();
continue;
} else if(valueStr[i] === '\\ '){
stack.push('\\ ');
}
if(stack[stack.length-1] === valueStr[i] && stack[stack.length-1] !== '"' ||
stack[stack.length-1] === valueStr[i] && valueStr[i] === '"'){
stack.pop();
} else if(openings[valueStr[i]] && stack[stack.length-1] !== '"'){
stack.push(openings[valueStr[i]]);
}
if (!stack.length && valueStr[i] === ',' || i === valueStr.length) {
const curVal = JSONParser(valueStr.slice(start, i));
output.push(curVal);
start = i+1;
}
}
return output;
}
So, one of the first things we can change about our function is the data structure of its output. rather than an array, we want to build up an object to eventually return.
function parseObj(jstring){
const output = {};
}
Most of the rest of the stuff in our parseArr
function can stick around, because we'll still be going through the process of checking if it's empty, cutting off the {
and }
, looping through our JSON string, and maintaining our stack
.
function parseObj(jstring){
const output = [];
if(jstring.length < 3) return output;
const valueStr = jstring.slice(1, jstring.length-1)
let start = 0;
for(let i = 0; i <= valueStr.length; i++){
// PLEASE NOTE: all instances of '\\ ' should actually be '\\'
// Dev.to's syntax highlighting doesn't appropriately account for the fact that the second backslash is escaped by the first.
if(stack[stack.length-1] === '\\ '){
stack.pop();
continue;
} else if(valueStr[i] === '\\ '){
stack.push('\\ ');
}
if(stack[stack.length-1] === valueStr[i] && stack[stack.length-1] !== '"' ||
stack[stack.length-1] === valueStr[i] && valueStr[i] === '"'){
stack.pop();
} else if(openings[valueStr[i]] && stack[stack.length-1] !== '"'){
stack.push(openings[valueStr[i]]);
}
}
return output;
}
So, at this point, we're able to iterate over the contents of our object, maintaining a stack that will give us an indication of how deeply we're nested / whether we should look at a bracket as indicating the start of a new piece of data, or if it's just part of a string. (["[]"]
is different than [[]]
, or ["[","]"]
).
The final task, now, is to grab the key/value pairs from our string and store them in our object. We'll initialize two variables, key
, and val
to store them.
function parseObj(jstring){
const output = [];
if(jstring.length < 3) return output;
const valueStr = jstring.slice(1, jstring.length-1)
let start = 0;
let key;
let val;
//... and so on
How will we go about populating key
and val
? We know that both keys and values are javascript values, so we can resolve their values with a recursive call to JSONParser
, so long as we know which part of the JSON string corresponds to key
, and which to value
.
Let's look at an example object:
{"key1":"val1","key2":"val2"}
it's pretty clear that the first thing we hit will be a key, and that the key ends at the first :
. After the first :
, we have the first value, which ends at the ,
. After that, we have a key again, then a value that terminates with the end of the object.
So, a key will start either at the front of the object, or directly after a ,
, and a value will start after a :
, and end either at a ,
or the end of the object.
Knowing this, we can finally populate our keys and values, then assign them within our object.
function parseObj(jstring){
const output = [];
if(jstring.length < 3) return output;
const valueStr = jstring.slice(1, jstring.length-1)
let start = 0;
let key;
let val;
for(let i = 0; i <= valueStr.length; i++){
// PLEASE NOTE: all instances of '\\ ' should actually be '\\'
// Dev.to's syntax highlighting doesn't appropriately account for the fact that the second backslash is escaped by the first.
if(stack[stack.length-1] === '\\ '){
stack.pop();
continue;
} else if(valueStr[i] === '\\ '){
stack.push('\\ ');
}
if(stack[stack.length-1] === valueStr[i] && stack[stack.length-1] !== '"' ||
stack[stack.length-1] === valueStr[i] && valueStr[i] === '"'){
stack.pop();
} else if(openings[valueStr[i]] && stack[stack.length-1] !== '"'){
stack.push(openings[valueStr[i]]);
}
if (valueStr[i] === ':'){
key = JSONParser(valueStr.slice(start, i))
start = i+1;
}
if (valueStr[i] === ',' || i === valueStr.length){
val = JSONParser(valueStr.slice(start, i));
start = i+1;
output[key] = val;
}
}
return output;
}
one more thing!
The final, final part of this is that we should only be adding key/value pairs to the object if our stack is clear. Otherwise, we'll run into problems with situations like these: {"hi":"{"}
, or {"one":{"two":"{"},"three":{"three":"}"}}
So, finally, we'll just check if our stack is clear before we store our key
or val
.
function parseObj(jstring){
const output = [];
if(jstring.length < 3) return output;
const valueStr = jstring.slice(1, jstring.length-1)
let start = 0;
let key;
let val;
for(let i = 0; i <= valueStr.length; i++){
// PLEASE NOTE: all instances of '\\ ' should actually be '\\'
// Dev.to's syntax highlighting doesn't appropriately account for the fact that the second backslash is escaped by the first.
if(stack[stack.length-1] === '\\ '){
stack.pop();
continue;
} else if(valueStr[i] === '\\ '){
stack.push('\\ ');
}
if(stack[stack.length-1] === valueStr[i] && stack[stack.length-1] !== '"' ||
stack[stack.length-1] === valueStr[i] && valueStr[i] === '"'){
stack.pop();
} else if(openings[valueStr[i]] && stack[stack.length-1] !== '"'){
stack.push(openings[valueStr[i]]);
}
if (!stack.length){
if (valueStr[i] === ':'){
key = JSONParser(valueStr.slice(start, i))
start = i+1;
}
if (valueStr[i] === ',' || i === valueStr.length){
val = JSONParser(valueStr.slice(start, i));
start = i+1;
output[key] = val;
}
}
}
return output;
}
With this, we just have to hook this function into our JSONParser
function when we see a {
and we'll be all set!
function JSONParser(jstring){
if(jstring[0] === '"') return jstring.slice(1, jstring.length-1);
if(jstring[0] === 't') return true;
if(jstring[0] === 'f') return false;
if(jstring[0] === 'u') return undefined;
if(jstring[0] === 'n') return null;
if(jstring.charCodeAt() >= 48 && jstring.charCodeAt() <= 57) return Number(jstring);
if(jstring[0] === '[') return parseArray(jstring);
if(jstring[0] === '{') return parseArray(jstring);
}
const openings = {
'"': '"',
'[': ']',
'{': '}'
};
const stack = [];
function parseArray(jstring){
const output = [];
if(jstring.length < 3) return output;
const valueStr = jstring.slice(1, jstring.length-1)
let start = 0;
for(let i = 0; i <= valueStr.length; i++){
// PLEASE NOTE: all instances of '\\ ' should actually be '\\'
// Dev.to's syntax highlighting doesn't appropriately account for the fact that the second backslash is escaped by the first.
if(stack[stack.length-1] === '\\ '){
stack.pop();
continue;
} else if(valueStr[i] === '\\ '){
stack.push('\\ ');
}
if(stack[stack.length-1] === valueStr[i] && stack[stack.length-1] !== '"' ||
stack[stack.length-1] === valueStr[i] && valueStr[i] === '"'){
stack.pop();
} else if(openings[valueStr[i]] && stack[stack.length-1] !== '"'){
stack.push(openings[valueStr[i]]);
}
if (!stack.length && valueStr[i] === ',' || i === valueStr.length) {
const curVal = JSONParser(valueStr.slice(start, i));
output.push(curVal);
start = i+1;
}
}
return output;
}
function parseObj(jstring){
const output = [];
if(jstring.length < 3) return output;
const valueStr = jstring.slice(1, jstring.length-1)
let start = 0;
let key;
let val;
for(let i = 0; i <= valueStr.length; i++){
// PLEASE NOTE: all instances of '\\ ' should actually be '\\'
// Dev.to's syntax highlighting doesn't appropriately account for the fact that the second backslash is escaped by the first.
if(stack[stack.length-1] === '\\ '){
stack.pop();
continue;
} else if(valueStr[i] === '\\ '){
stack.push('\\ ');
}
if(stack[stack.length-1] === valueStr[i] && stack[stack.length-1] !== '"' ||
stack[stack.length-1] === valueStr[i] && valueStr[i] === '"'){
stack.pop();
} else if(openings[valueStr[i]] && stack[stack.length-1] !== '"'){
stack.push(openings[valueStr[i]]);
}
if (!stack.length){
if (valueStr[i] === ':'){
key = JSONParser(valueStr.slice(start, i))
start = i+1;
}
if (valueStr[i] === ',' || i === valueStr.length){
val = JSONParser(valueStr.slice(start, i));
start = i+1;
output[key] = val;
}
}
}
return output;
}
Notes and Conclusions
- We're assuming we get a properly formatted JSON string in
- We're also assuming that there's no extraneous whitespace, since
JSON.parse
doesn't include extra whitespace or newlines by default
Other than that, as long as the JSON coming in was formed by JSON.parse
, this should handle everything!
Posted on May 17, 2019
Join Our Newsletter. No Spam, Only the good stuff.
Sign up to receive the latest update from our blog.