1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
(function(global) {
const DEBUG = false;
var debug = DEBUG ? console.log.bind(console) : function(){};
if (typeof module === 'object' && typeof module.exports === 'object') {
require('./lib/htmlparser.js');
}
function q(v) {
return '"' + v + '"';
}
function removeDOCTYPE(html) {
return html
.replace(/<\?xml.*\?>\n/, '')
.replace(/<!doctype.*\>\n/, '')
.replace(/<!DOCTYPE.*\>\n/, '');
}
global.html2json = function html2json(html) {
html = removeDOCTYPE(html);
var bufArray = [];
var results = {
node: 'root',
child: [],
};
HTMLParser(html, {
start: function(tag, attrs, unary) {
debug(tag, attrs, unary);
// node for this element
var node = {
node: 'element',
tag: tag,
};
if (attrs.length !== 0) {
node.attr = attrs.reduce(function(pre, attr) {
var name = attr.name;
var value = attr.value;
// has multi attibutes
// make it array of attribute
if (value.match(/ /)) {
value = value.split(' ');
}
// if attr already exists
// merge it
if (pre[name]) {
if (Array.isArray(pre[name])) {
// already array, push to last
pre[name].push(value);
} else {
// single value, make it array
pre[name] = [pre[name], value];
}
} else {
// not exist, put it
pre[name] = value;
}
return pre;
}, {});
}
if (unary) {
// if this tag dosen't have end tag
// like <img src="hoge.png"/>
// add to parents
var parent = bufArray[0] || results;
if (parent.child === undefined) {
parent.child = [];
}
parent.child.push(node);
} else {
bufArray.unshift(node);
}
},
end: function(tag) {
debug(tag);
// merge into parent tag
var node = bufArray.shift();
if (node.tag !== tag) console.error('invalid state: mismatch end tag');
if (bufArray.length === 0) {
results.child.push(node);
} else {
var parent = bufArray[0];
if (parent.child === undefined) {
parent.child = [];
}
parent.child.push(node);
}
},
chars: function(text) {
debug(text);
var node = {
node: 'text',
text: text,
};
if (bufArray.length === 0) {
results.child.push(node);
} else {
var parent = bufArray[0];
if (parent.child === undefined) {
parent.child = [];
}
parent.child.push(node);
}
},
comment: function(text) {
debug(text);
var node = {
node: 'comment',
text: text,
};
var parent = bufArray[0];
if (parent.child === undefined) {
parent.child = [];
}
parent.child.push(node);
},
});
return results;
};
global.json2html = function json2html(json) {
// Empty Elements - HTML 4.01
var empty = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'embed'];
var child = '';
if (json.child) {
child = json.child.map(function(c) {
return json2html(c);
}).join('');
}
var attr = '';
if (json.attr) {
attr = Object.keys(json.attr).map(function(key) {
var value = json.attr[key];
if (Array.isArray(value)) value = value.join(' ');
return key + '=' + q(value);
}).join(' ');
if (attr !== '') attr = ' ' + attr;
}
if (json.node === 'element') {
var tag = json.tag;
if (empty.indexOf(tag) > -1) {
// empty element
return '<' + json.tag + attr + '/>';
}
// non empty element
var open = '<' + json.tag + attr + '>';
var close = '</' + json.tag + '>';
return open + child + close;
}
if (json.node === 'text') {
return json.text;
}
if (json.node === 'comment') {
return '<!--' + json.text + '-->';
}
if (json.node === 'root') {
return child;
}
};
})(this);