1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.kathrynhuxtable.maven.plugins.htmlfiltersite;
19
20 import java.io.IOException;
21 import java.io.Writer;
22 import java.util.List;
23
24 import org.jdom.Attribute;
25 import org.jdom.CDATA;
26 import org.jdom.Comment;
27 import org.jdom.Element;
28 import org.jdom.EntityRef;
29 import org.jdom.Namespace;
30 import org.jdom.ProcessingInstruction;
31 import org.jdom.Text;
32 import org.jdom.Verifier;
33 import org.jdom.output.EscapeStrategy;
34 import org.jdom.output.Format;
35 import org.jdom.output.XMLOutputter;
36
37
38
39
40 public class HTMLOutputter extends XMLOutputter {
41
42 protected Format nonBreakingFormat = Format.getRawFormat();
43 protected EscapeStrategy htmlEscapeStrategy = new HTMLEscapeStrategy();
44
45 public HTMLOutputter() {
46 super();
47 currentFormat.setEscapeStrategy(htmlEscapeStrategy);
48 nonBreakingFormat.setEscapeStrategy(htmlEscapeStrategy);
49 }
50
51
52
53
54
55 public HTMLOutputter(Format format) {
56 super(format);
57 currentFormat.setEscapeStrategy(htmlEscapeStrategy);
58 nonBreakingFormat.setEscapeStrategy(htmlEscapeStrategy);
59 }
60
61
62
63
64 public void setFormat(Format newFormat) {
65 super.setFormat(newFormat);
66 currentFormat.setEscapeStrategy(htmlEscapeStrategy);
67 }
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 protected void printElement(Writer out, Element element, int level, NamespaceStack namespaces) throws IOException {
83
84 List<Attribute> attributes = getElementAttributes(element);
85 List<?> content = element.getContent();
86
87
88 String space = null;
89 if (attributes != null) {
90 space = element.getAttributeValue("space", Namespace.XML_NAMESPACE);
91 }
92
93 Format previousFormat = currentFormat;
94 if ("default".equals(space)) {
95 currentFormat = getFormat();
96 } else if ("preserve".equals(space) || isNonBreaking(element)) {
97 currentFormat = nonBreakingFormat;
98 }
99
100
101
102 out.write("<");
103 printQualifiedName(out, element);
104
105
106 int previouslyDeclaredNamespaces = namespaces.size();
107
108
109 printElementNamespace(out, element, namespaces);
110
111
112 printAdditionalNamespaces(out, element, namespaces);
113
114
115 if (attributes != null) printAttributes(out, attributes, element, namespaces);
116
117
118
119
120
121
122 int start = skipLeadingWhite(content, 0);
123 int size = content.size();
124 if (start >= size) {
125
126 if (true || currentFormat.getExpandEmptyElements()) {
127 out.write("></");
128 printQualifiedName(out, element);
129 out.write(">");
130 } else {
131 out.write(" />");
132 }
133 } else {
134 out.write(">");
135
136
137
138
139
140 if (nextNonText(content, start) < size) {
141
142 newline(out);
143 printContentRange(out, content, start, size, level + 1, namespaces);
144 newline(out);
145 indent(out, level);
146 } else {
147
148 printTextRange(out, content, start, size);
149 }
150 out.write("</");
151 printQualifiedName(out, element);
152 out.write(">");
153 }
154
155
156 while (namespaces.size() > previouslyDeclaredNamespaces) {
157 namespaces.pop();
158 }
159
160
161 currentFormat = previousFormat;
162 }
163
164 @SuppressWarnings("unchecked")
165 private List<Attribute> getElementAttributes(Element element) {
166 return (List<Attribute>) element.getAttributes();
167 }
168
169 private boolean isNonBreaking(Element element) {
170 String eName = element.getName();
171
172 if ("span".equals(eName) || "p".equals(eName) || "li".equals(eName) || "h1".equals(eName) || "h2".equals(eName)
173 || "h3".equals(eName) || "caption".equals(eName) || "sup".equals(eName) || "sub".equals(eName)) {
174 return true;
175 }
176 return false;
177 }
178
179
180
181
182
183 private void printString(Writer out, String str) throws IOException {
184 if (currentFormat.getTextMode() == Format.TextMode.NORMALIZE) {
185 str = Text.normalizeString(str);
186 } else if (currentFormat.getTextMode() == Format.TextMode.TRIM) {
187 str = str.trim();
188 }
189 out.write(escapeElementEntities(str));
190 }
191
192
193
194
195
196
197
198 private void newline(Writer out) throws IOException {
199 if (currentFormat.getIndent() != null) {
200 out.write(currentFormat.getLineSeparator());
201 }
202 }
203
204
205
206
207
208
209
210
211
212 private void indent(Writer out, int level) throws IOException {
213 if (currentFormat.getIndent() == null || currentFormat.getIndent().equals("")) {
214 return;
215 }
216
217 for (int i = 0; i < level; i++) {
218 out.write(currentFormat.getIndent());
219 }
220 }
221
222
223
224
225
226 private int skipLeadingWhite(List<?> content, int start) {
227 if (start < 0) {
228 start = 0;
229 }
230
231 int index = start;
232 int size = content.size();
233 if (currentFormat.getTextMode() == Format.TextMode.TRIM_FULL_WHITE
234 || currentFormat.getTextMode() == Format.TextMode.NORMALIZE || currentFormat.getTextMode() == Format.TextMode.TRIM) {
235 while (index < size) {
236 if (!isAllWhitespace(content.get(index))) {
237 return index;
238 }
239 index++;
240 }
241 }
242 return index;
243 }
244
245
246
247
248
249 private int skipTrailingWhite(List<?> content, int start) {
250 int size = content.size();
251 if (start > size) {
252 start = size;
253 }
254
255 int index = start;
256 if (currentFormat.getTextMode() == Format.TextMode.TRIM_FULL_WHITE
257 || currentFormat.getTextMode() == Format.TextMode.NORMALIZE || currentFormat.getTextMode() == Format.TextMode.TRIM) {
258 while (index >= 0) {
259 if (!isAllWhitespace(content.get(index - 1))) break;
260 --index;
261 }
262 }
263 return index;
264 }
265
266
267
268
269
270 private static int nextNonText(List<?> content, int start) {
271 if (start < 0) {
272 start = 0;
273 }
274
275 int index = start;
276 int size = content.size();
277 while (index < size) {
278 Object node = content.get(index);
279 if (!((node instanceof Text) || (node instanceof EntityRef))) {
280 return index;
281 }
282 index++;
283 }
284 return size;
285 }
286
287
288 private boolean isAllWhitespace(Object obj) {
289 String str = null;
290
291 if (obj instanceof String) {
292 str = (String) obj;
293 } else if (obj instanceof Text) {
294 str = ((Text) obj).getText();
295 } else if (obj instanceof EntityRef) {
296 return false;
297 } else {
298 return false;
299 }
300
301 for (int i = 0; i < str.length(); i++) {
302 if (!Verifier.isXMLWhitespace(str.charAt(i))) return false;
303 }
304 return true;
305 }
306
307
308 private boolean startsWithWhite(String str) {
309 if ((str != null) && (str.length() > 0) && Verifier.isXMLWhitespace(str.charAt(0))) {
310 return true;
311 }
312 return false;
313 }
314
315
316 private boolean endsWithWhite(String str) {
317 if ((str != null) && (str.length() > 0) && Verifier.isXMLWhitespace(str.charAt(str.length() - 1))) {
318 return true;
319 }
320 return false;
321 }
322
323
324
325 private void printQualifiedName(Writer out, Element e) throws IOException {
326 if (e.getNamespace().getPrefix().length() == 0) {
327 out.write(e.getName());
328 } else {
329 out.write(e.getNamespace().getPrefix());
330 out.write(':');
331 out.write(e.getName());
332 }
333 }
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353 private void printContentRange(Writer out, List<?> content, int start, int end, int level, NamespaceStack namespaces)
354 throws IOException {
355 boolean firstNode;
356 Object next;
357 int first, index;
358
359 index = start;
360 while (index < end) {
361 firstNode = (index == start) ? true : false;
362 next = content.get(index);
363
364
365
366
367
368 if ((next instanceof Text) || (next instanceof EntityRef)) {
369 first = skipLeadingWhite(content, index);
370
371 index = nextNonText(content, first);
372
373
374 if (first < index) {
375 if (!firstNode) newline(out);
376 indent(out, level);
377 printTextRange(out, content, first, index);
378 }
379 continue;
380 }
381
382
383
384
385 if (!firstNode) {
386 newline(out);
387 }
388
389 indent(out, level);
390
391 if (next instanceof Comment) {
392 printComment(out, (Comment) next);
393 } else if (next instanceof Element) {
394 printElement(out, (Element) next, level, namespaces);
395 } else if (next instanceof ProcessingInstruction) {
396 printProcessingInstruction(out, (ProcessingInstruction) next);
397 } else {
398
399
400
401 }
402
403 index++;
404 }
405 }
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421 private void printTextRange(Writer out, List<?> content, int start, int end) throws IOException {
422 String previous;
423 Object node;
424 String next;
425
426 previous = null;
427
428
429 start = skipLeadingWhite(content, start);
430
431 int size = content.size();
432 if (start < size) {
433
434 end = skipTrailingWhite(content, end);
435
436 for (int i = start; i < end; i++) {
437 node = content.get(i);
438
439
440
441 if (node instanceof Text) {
442 next = ((Text) node).getText();
443 } else if (node instanceof EntityRef) {
444 next = "&" + ((EntityRef) node).getValue() + ";";
445 } else {
446 throw new IllegalStateException("Should see only " + "CDATA, Text, or EntityRef");
447 }
448
449
450 if (next == null || "".equals(next)) {
451 continue;
452 }
453
454
455
456 if (previous != null) {
457 if (currentFormat.getTextMode() == Format.TextMode.NORMALIZE
458 || currentFormat.getTextMode() == Format.TextMode.TRIM) {
459 if ((endsWithWhite(previous)) || (startsWithWhite(next))) {
460 out.write(" ");
461 }
462 }
463 }
464
465
466 if (node instanceof CDATA) {
467 printCDATA(out, (CDATA) node);
468 } else if (node instanceof EntityRef) {
469 printEntityRef(out, (EntityRef) node);
470 } else {
471 printString(out, next);
472 }
473
474 previous = next;
475 }
476 }
477 }
478
479
480
481
482
483
484
485
486
487
488 private void printNamespace(Writer out, Namespace ns, NamespaceStack namespaces) throws IOException {
489 String prefix = ns.getPrefix();
490 String uri = ns.getURI();
491
492
493 if (uri.equals(namespaces.getURI(prefix))) {
494 return;
495 }
496
497 out.write(" xmlns");
498 if (!prefix.equals("")) {
499 out.write(":");
500 out.write(prefix);
501 }
502 out.write("=\"");
503 out.write(escapeAttributeEntities(uri));
504 out.write("\"");
505 namespaces.push(ns);
506 }
507
508 private void printElementNamespace(Writer out, Element element, NamespaceStack namespaces) throws IOException {
509
510
511
512
513 Namespace ns = element.getNamespace();
514 if (ns == Namespace.XML_NAMESPACE) {
515 return;
516 }
517 if (!((ns == Namespace.NO_NAMESPACE) && (namespaces.getURI("") == null))) {
518 printNamespace(out, ns, namespaces);
519 }
520 }
521
522 private void printAdditionalNamespaces(Writer out, Element element, NamespaceStack namespaces) throws IOException {
523 List<Namespace> list = getElementAdditionalNamespaces(element);
524 if (list != null) {
525 for (int i = 0; i < list.size(); i++) {
526 Namespace additional = list.get(i);
527 printNamespace(out, additional, namespaces);
528 }
529 }
530 }
531
532 @SuppressWarnings("unchecked")
533 private List<Namespace> getElementAdditionalNamespaces(Element element) {
534 return (List<Namespace>) element.getAdditionalNamespaces();
535 }
536
537 public class HTMLEscapeStrategy implements EscapeStrategy {
538 public boolean shouldEscape(char ch) {
539
540
541
542 if (ch < ' ' || ch > 127) {
543 return true;
544 } else {
545 return false;
546 }
547 }
548 }
549 }