-
Notifications
You must be signed in to change notification settings - Fork 1
/
Markdown.Sanitizer.js
129 lines (111 loc) · 5 KB
/
Markdown.Sanitizer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
(function () {
var output, Converter;
if (typeof exports === "object" && typeof require === "function") { // we're in a CommonJS (e.g. Node.js) module
output = exports;
Converter = require("./Markdown.Converter").Converter;
} else {
output = window.Markdown;
Converter = output.Converter;
}
output.getSanitizingConverter = function () {
var converter = new Converter();
converter.hooks.addFalse("isValidTag");
converter.hooks.chain("postConversion", function (html) {
return sanitizeHtml(html, converter);
});
converter.hooks.chain("postConversion", balanceTags);
return converter;
}
function sanitizeHtml(html, converter) {
return html.replace(/<[^>]*>?/gi, function (html) {
return sanitizeTag(html, converter);
});
}
// (tags that can be opened/closed) | (tags that stand alone)
var basic_tag_whitelist = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol(?: start="\d+")?|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i;
// <a href="url..." optional title>|</a>
var a_white = /^(<a\shref="((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)*[\]$]+"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i;
// <img src="url..." optional width optional height optional alt optional title
var img_white = /^(<img\ssrc="(https?:\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)*[\]$]+"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i;
function sanitizeTag(tag, converter) {
if (tag.match(basic_tag_whitelist) || tag.match(a_white) || tag.match(img_white) || converter.hooks.isValidTag(tag))
return tag;
else {
var anyChange = false;
// if it looks like it *might* be valid, then try percent-encoding illegal characters in the src or href attribute
// and then try the whitelist again -- if that fixes it, then replace the found tag with the fixed one.
var encoded = tag.replace(/^(<a href="|<img src=")([^"]*)/i, function (wholematch, prefix, url) {
return prefix + url.replace(/[^-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)*[\]$]/g, function (c) {
anyChange = true;
if (c == "'") // this is the only character that isn't in our whitelist and that is returned unchanged by encodeURIComponent()
return "%27";
else
return encodeURIComponent(c);
});
});
if (anyChange && (encoded.match(a_white) || encoded.match(img_white)))
return encoded;
}
return "";
}
/// <summary>
/// attempt to balance HTML tags in the html string
/// by removing any unmatched opening or closing tags
/// IMPORTANT: we *assume* HTML has *already* been
/// sanitized and is safe/sane before balancing!
///
/// adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593
/// </summary>
function balanceTags(html) {
if (html == "")
return "";
var re = /<\/?\w+[^>]*(\s|$|>)/g;
// convert everything to lower case; this makes
// our case insensitive comparisons easier
var tags = html.toLowerCase().match(re);
// no HTML tags present? nothing to do; exit now
var tagcount = (tags || []).length;
if (tagcount == 0)
return html;
var tagname, tag;
var ignoredtags = "<p><img><br><li><hr>";
var match;
var tagpaired = [];
var tagremove = [];
var needsRemoval = false;
// loop through matched tags in forward order
for (var ctag = 0; ctag < tagcount; ctag++) {
tagname = tags[ctag].replace(/<\/?(\w+).*/, "$1");
// skip any already paired tags
// and skip tags in our ignore list; assume they're self-closed
if (tagpaired[ctag] || ignoredtags.search("<" + tagname + ">") > -1)
continue;
tag = tags[ctag];
match = -1;
if (!/^<\//.test(tag)) {
// this is an opening tag
// search forwards (next tags), look for closing tags
for (var ntag = ctag + 1; ntag < tagcount; ntag++) {
if (!tagpaired[ntag] && tags[ntag] == "</" + tagname + ">") {
match = ntag;
break;
}
}
}
if (match == -1)
needsRemoval = tagremove[ctag] = true; // mark for removal
else
tagpaired[match] = true; // mark paired
}
if (!needsRemoval)
return html;
// delete all orphaned tags from the string
var ctag = 0;
html = html.replace(re, function (match) {
var res = tagremove[ctag] ? "" : match;
ctag++;
return res;
});
return html;
}
})();