昨天跟友人一起大块欢饮之后,问了一个比较好玩的问题? JSON.parse 是怎么实现?当时草草的中规中矩的回答了一番,但终究对自己无知不是很满意
今天上午想起来之后,便迅速翻出node的源码来一探究竟!
对js的解析无疑是 v8的专属,但由于v8是由c++编写,我对c++的各种语法了解比较一般,如果有描述错误的地方,还请各位大神斧正.
v8的git地址 https://chromium.googlesource.com/v8/v8.git 需要FQ
node的官网 https://nodejs.org , 如果速度慢可以直接访问 http://nodejs.cn/
我是在node源码的deps找到的v8的相关代码
在v8的src文件夹中 直接全局搜索 JSON::Parse ( c++的语法 ) 可以找到对应代码编写在 api.cc文件中,具体代码如下:
在刚刚的搜索结果中,找到一个简单的json parse,我们就从这里入手
这个例子里面关键的代码不多,主要是
ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
JsonParser(isolate, source).ParseJson(), Object);
对string做处理的是 JSONParse对象的ParseJson函数,然后继续全局搜索 JSONParse,在 json-parser.cc中找到了ParseJson的相关代码
template <bool seq_one_byte>
MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() {
// Advance to the first character (possibly EOS)
AdvanceSkipWhitespace();
Handle<Object> result = ParseJsonValue();
if (result.is_null() || c0_ != kEndOfString) {
// Some exception (for example stack overflow) is already pending.
if (isolate_->has_pending_exception()) return Handle<Object>::null();
// Parse failed. Current character is the unexpected token.
Factory* factory = this->factory();
MessageTemplate::Template message;
Handle<Object> arg1 = Handle<Smi>(Smi::FromInt(position_), isolate());
Handle<Object> arg2;
switch (c0_) {
case kEndOfString:
message = MessageTemplate::kJsonParseUnexpectedEOS;
break;
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
message = MessageTemplate::kJsonParseUnexpectedTokenNumber;
break;
case '"':
message = MessageTemplate::kJsonParseUnexpectedTokenString;
break;
default:
message = MessageTemplate::kJsonParseUnexpectedToken;
arg2 = arg1;
arg1 = factory->LookupSingleCharacterStringFromCode(c0_);
break;
}
Handle<Script> script(factory->NewScript(source_));
// We should sent compile error event because we compile JSON object in
// separated source file.
isolate()->debug()->OnCompileError(script);
MessageLocation location(script, position_, position_ + 1);
Handle<Object> error = factory->NewSyntaxError(message, arg1, arg2);
return isolate()->template Throw<Object>(error, &location);
}
return result;
}
ParseJson
很明显,这个函数的代码中最关键的就是ParseJsonValue这个函数
// Parse any JSON value.
template <bool seq_one_byte>
Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() {
StackLimitCheck stack_check(isolate_);
if (stack_check.HasOverflowed()) {
isolate_->StackOverflow();
return Handle<Object>::null();
}
if (stack_check.InterruptRequested() &&
isolate_->stack_guard()->HandleInterrupts()->IsException(isolate_)) {
return Handle<Object>::null();
}
if (c0_ == '"') return ParseJsonString();
if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
if (c0_ == '{') return ParseJsonObject();
if (c0_ == '[') return ParseJsonArray();
if (c0_ == 'f') {
if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
AdvanceSkipWhitespace();
return factory()->false_value();
}
return ReportUnexpectedCharacter();
}
if (c0_ == 't') {
if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
AdvanceGetChar() == 'e') {
AdvanceSkipWhitespace();
return factory()->true_value();
}
return ReportUnexpectedCharacter();
}
if (c0_ == 'n') {
if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
AdvanceGetChar() == 'l') {
AdvanceSkipWhitespace();
return factory()->null_value();
}
return ReportUnexpectedCharacter();
}
return ReportUnexpectedCharacter();
}
ParseJsonValue
string在解析之前,进行了一次字符串格式的整理,这样可以保证c0_ 是string的第一个有效字符,而不是空格之类的无用字符.
接下来就比较简单了,根据不同的第一个字符进行不同的处理,其他都会报未期待的字符错误
字符 | 调用函数 |
" | ParseJsonString |
数字或 - | ParseJsonNumber |
{ | ParseJsonObject |
[ | ParseJsonArray |
f | 判断是否是false |
t | 判断是否是true |
n | 判断是否是null |
ParseJsonString 继续验证string的有效性,因为本身就string类型,所以不需要类型转换
template <bool seq_one_byte>
bool JsonParser<seq_one_byte>::ParseJsonString(Handle<String> expected) {
int length = expected->length();
if (source_->length() - position_ - 1 > length) {
DisallowHeapAllocation no_gc;
String::FlatContent content = expected->GetFlatContent();
if (content.IsOneByte()) {
DCHECK_EQ('"', c0_);
const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1;
const uint8_t* expected_chars = content.ToOneByteVector().start();
for (int i = 0; i < length; i++) {
uint8_t c0 = input_chars[i];
if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\') {
return false;
}
}
if (input_chars[length] == '"') {
position_ = position_ + length + 1;
AdvanceSkipWhitespace();
return true;
}
}
}
return false;
}
ParseJsonString
ParseJsonNumber 验证有效性后,利用 StringToDouble 把string转化为数字
template <bool seq_one_byte>
Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() {
bool negative = false;
int beg_pos = position_;
if (c0_ == '-') {
Advance();
negative = true;
}
if (c0_ == '0') {
Advance();
// Prefix zero is only allowed if it's the only digit before
// a decimal point or exponent.
if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
} else {
int i = 0;
int digits = 0;
if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
do {
i = i * 10 + c0_ - '0';
digits++;
Advance();
} while (IsDecimalDigit(c0_));
if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
SkipWhitespace();
return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
}
}
if (c0_ == '.') {
Advance();
if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
do {
Advance();
} while (IsDecimalDigit(c0_));
}
if (AsciiAlphaToLower(c0_) == 'e') {
Advance();
if (c0_ == '-' || c0_ == '+') Advance();
if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter();
do {
Advance();
} while (IsDecimalDigit(c0_));
}
int length = position_ - beg_pos;
double number;
if (seq_one_byte) {
Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length);
number = StringToDouble(isolate()->unicode_cache(), chars,
NO_FLAGS, // Hex, octal or trailing junk.
std::numeric_limits<double>::quiet_NaN());
} else {
Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
Vector<const uint8_t> result =
Vector<const uint8_t>(buffer.start(), length);
number = StringToDouble(isolate()->unicode_cache(), result,
NO_FLAGS, // Hex, octal or trailing junk.
0.0);
buffer.Dispose();
}
SkipWhitespace();
return factory()->NewNumber(number, pretenure_);
}
ParseJsonNumber
ParseJsonObject 先建立了一个空对象,对整个字符串开始循环检查语法匹配,并将对应的key,value全部找到后添加到之前的空对象中
// Parse a JSON object. Position must be right at '{'.
template <bool seq_one_byte>
Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() {
HandleScope scope(isolate());
Handle<JSObject> json_object =
factory()->NewJSObject(object_constructor(), pretenure_);
Handle<Map> map(json_object->map());
int descriptor = 0;
ZoneList<Handle<Object> > properties(8, zone());
DCHECK_EQ(c0_, '{');
bool transitioning = true;
AdvanceSkipWhitespace();
if (c0_ != '}') {
do {
if (c0_ != '"') return ReportUnexpectedCharacter();
int start_position = position_;
Advance();
if (IsDecimalDigit(c0_)) {
ParseElementResult element_result = ParseElement(json_object);
if (element_result == kNullHandle) return Handle<Object>::null();
if (element_result == kElementFound) continue;
}
// Not an index, fallback to the slow path.
position_ = start_position;
#ifdef DEBUG
c0_ = '"';
#endif
Handle<String> key;
Handle<Object> value;
// Try to follow existing transitions as long as possible. Once we stop
// transitioning, no transition can be found anymore.
DCHECK(transitioning);
// First check whether there is a single expected transition. If so, try
// to parse it first.
bool follow_expected = false;
Handle<Map> target;
if (seq_one_byte) {
key = TransitionArray::ExpectedTransitionKey(map);
follow_expected = !key.is_null() && ParseJsonString(key);
}
// If the expected transition hits, follow it.
if (follow_expected) {
target = TransitionArray::ExpectedTransitionTarget(map);
} else {
// If the expected transition failed, parse an internalized string and
// try to find a matching transition.
key = ParseJsonInternalizedString();
if (key.is_null()) return ReportUnexpectedCharacter();
target = TransitionArray::FindTransitionToField(map, key);
// If a transition was found, follow it and continue.
transitioning = !target.is_null();
}
if (c0_ != ':') return ReportUnexpectedCharacter();
AdvanceSkipWhitespace();
value = ParseJsonValue();
if (value.is_null()) return ReportUnexpectedCharacter();
if (transitioning) {
PropertyDetails details =
target->instance_descriptors()->GetDetails(descriptor);
Representation expected_representation = details.representation();
if (value->FitsRepresentation(expected_representation)) {
if (expected_representation.IsHeapObject() &&
!target->instance_descriptors()
->GetFieldType(descriptor)
->NowContains(value)) {
Handle<FieldType> value_type(
value->OptimalType(isolate(), expected_representation));
Map::GeneralizeField(target, descriptor, details.constness(),
expected_representation, value_type);
}
DCHECK(target->instance_descriptors()
->GetFieldType(descriptor)
->NowContains(value));
properties.Add(value, zone());
map = target;
descriptor++;
continue;
} else {
transitioning = false;
}
}
DCHECK(!transitioning);
// Commit the intermediate state to the object and stop transitioning.
CommitStateToJsonObject(json_object, map, &properties);
JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value)
.Check();
} while (transitioning && MatchSkipWhiteSpace(','));
// If we transitioned until the very end, transition the map now.
if (transitioning) {
CommitStateToJsonObject(json_object, map, &properties);
} else {
while (MatchSkipWhiteSpace(',')) {
HandleScope local_scope(isolate());
if (c0_ != '"') return ReportUnexpectedCharacter();
int start_position = position_;
Advance();
if (IsDecimalDigit(c0_)) {
ParseElementResult element_result = ParseElement(json_object);
if (element_result == kNullHandle) return Handle<Object>::null();
if (element_result == kElementFound) continue;
}
// Not an index, fallback to the slow path.
position_ = start_position;
#ifdef DEBUG
c0_ = '"';
#endif
Handle<String> key;
Handle<Object> value;
key = ParseJsonInternalizedString();
if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
AdvanceSkipWhitespace();
value = ParseJsonValue();
if (value.is_null()) return ReportUnexpectedCharacter();
JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key,
value)
.Check();
}
}
if (c0_ != '}') {
return ReportUnexpectedCharacter();
}
}
AdvanceSkipWhitespace();
return scope.CloseAndEscape(json_object);
}
ParseJsonObject
ParseJsonArray 大致上跟ParseJsonObject比较相同,
// Parse a JSON array. Position must be right at '['.
template <bool seq_one_byte>
Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() {
HandleScope scope(isolate());
ZoneList<Handle<Object> > elements(4, zone());
DCHECK_EQ(c0_, '[');
ElementKindLattice lattice;
AdvanceSkipWhitespace();
if (c0_ != ']') {
do {
Handle<Object> element = ParseJsonValue();
if (element.is_null()) return ReportUnexpectedCharacter();
elements.Add(element, zone());
lattice.Update(element);
} while (MatchSkipWhiteSpace(','));
if (c0_ != ']') {
return ReportUnexpectedCharacter();
}
}
AdvanceSkipWhitespace();
// Allocate a fixed array with all the elements.
Handle<Object> json_array;
const ElementsKind kind = lattice.GetElementsKind();
switch (kind) {
case PACKED_ELEMENTS:
case PACKED_SMI_ELEMENTS: {
Handle<FixedArray> elems =
factory()->NewFixedArray(elements.length(), pretenure_);
for (int i = 0; i < elements.length(); i++) elems->set(i, *elements[i]);
json_array = factory()->NewJSArrayWithElements(elems, kind, pretenure_);
break;
}
case PACKED_DOUBLE_ELEMENTS: {
Handle<FixedDoubleArray> elems = Handle<FixedDoubleArray>::cast(
factory()->NewFixedDoubleArray(elements.length(), pretenure_));
for (int i = 0; i < elements.length(); i++) {
elems->set(i, elements[i]->Number());
}
json_array = factory()->NewJSArrayWithElements(elems, kind, pretenure_);
break;
}
default:
UNREACHABLE();
}
return scope.CloseAndEscape(json_array);
}
ParseJsonArray
这几个方法中对ParseJsonValue进行的递归,来保证一次循环中读取的值是一个完整的值,这个想法非常棒,从而保证了对象的属性放置对象,数字,数组,字符串,null...等等的兼容.
学而时习之,不亦说乎.
第一次写技术博客,难免有所欠缺,
欢迎江湖大侠前来指导.
原文链接: https://www.cnblogs.com/xiaxiaodong/p/8086466.html
欢迎关注
微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍
原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/266270
非原创文章文中已经注明原地址,如有侵权,联系删除
关注公众号【高性能架构探索】,第一时间获取最新文章
转载文章受原作者版权保护。转载请注明原作者出处!