1:
37:
38: package ;
39:
40: import ;
41: import ;
42: import ;
43: import ;
44: import ;
45: import ;
46: import ;
47: import ;
48: import ;
49: import ;
50: import ;
51: import ;
52: import ;
53: import ;
54: import ;
55: import ;
56: import ;
57: import ;
58: import ;
59: import ;
60: import ;
61:
62:
67: public class StreamSerializer
68: {
69:
70: static final int SPACE = 0x20;
71: static final int BANG = 0x21;
72: static final int APOS = 0x27;
73: static final int SLASH = 0x2f;
74: static final int BRA = 0x3c;
75: static final int KET = 0x3e;
76: static final int EQ = 0x3d;
77:
78:
81: static final Map HTML_BOOLEAN_ATTRIBUTES = new HashMap();
82: static
83: {
84: HashSet set;
85:
86: set = new HashSet();
87: set.add("nohref");
88: HTML_BOOLEAN_ATTRIBUTES.put("area", set);
89:
90: set = new HashSet();
91: set.add("ismap");
92: HTML_BOOLEAN_ATTRIBUTES.put("img", set);
93:
94: set = new HashSet();
95: set.add("declare");
96: HTML_BOOLEAN_ATTRIBUTES.put("object", set);
97:
98: set = new HashSet();
99: set.add("noshade");
100: HTML_BOOLEAN_ATTRIBUTES.put("hr", set);
101:
102: set = new HashSet();
103: set.add("compact");
104: HTML_BOOLEAN_ATTRIBUTES.put("dl", set);
105: HTML_BOOLEAN_ATTRIBUTES.put("ol", set);
106: HTML_BOOLEAN_ATTRIBUTES.put("ul", set);
107: HTML_BOOLEAN_ATTRIBUTES.put("dir", set);
108: HTML_BOOLEAN_ATTRIBUTES.put("menu", set);
109:
110: set = new HashSet();
111: set.add("checked");
112: set.add("disabled");
113: set.add("readonly");
114: set.add("ismap");
115: HTML_BOOLEAN_ATTRIBUTES.put("input", set);
116:
117: set = new HashSet();
118: set.add("multiple");
119: set.add("disabled");
120: HTML_BOOLEAN_ATTRIBUTES.put("select", set);
121:
122: set = new HashSet();
123: set.add("disabled");
124: HTML_BOOLEAN_ATTRIBUTES.put("optgroup", set);
125:
126: set = new HashSet();
127: set.add("selected");
128: set.add("disabled");
129: HTML_BOOLEAN_ATTRIBUTES.put("option", set);
130:
131: set = new HashSet();
132: set.add("disabled");
133: set.add("readonly");
134: HTML_BOOLEAN_ATTRIBUTES.put("textarea", set);
135:
136: set = new HashSet();
137: set.add("disabled");
138: HTML_BOOLEAN_ATTRIBUTES.put("button", set);
139:
140: set = new HashSet();
141: set.add("nowrap");
142: HTML_BOOLEAN_ATTRIBUTES.put("th", set);
143: HTML_BOOLEAN_ATTRIBUTES.put("td", set);
144:
145: set = new HashSet();
146: set.add("noresize");
147: HTML_BOOLEAN_ATTRIBUTES.put("frame", set);
148:
149: set = new HashSet();
150: set.add("defer");
151: HTML_BOOLEAN_ATTRIBUTES.put("script", set);
152: }
153:
154:
155: static final HashSet HTML_URIS = new HashSet();
156: static {
157: HTML_URIS.add("http://www.w3.org/1999/xhtml");
158: }
159:
160: protected final String encoding;
161: final Charset charset;
162: final CharsetEncoder encoder;
163: final int mode;
164: final LinkedList namespaces;
165: protected String eol;
166: Collection cdataSectionElements = Collections.EMPTY_SET;
167:
168: protected boolean discardDefaultContent;
169: protected boolean xmlDeclaration = true;
170:
171:
172: private boolean htmlEncoded;
173:
174: public StreamSerializer()
175: {
176: this(Stylesheet.OUTPUT_XML, null, null);
177: }
178:
179: public StreamSerializer(String encoding)
180: {
181: this(Stylesheet.OUTPUT_XML, encoding, null);
182: }
183:
184: public StreamSerializer(int mode, String encoding, String eol)
185: {
186: this.mode = mode;
187: if (encoding == null)
188: encoding = (mode == Stylesheet.OUTPUT_HTML) ? "ISO-8859-1" : "UTF-8";
189: this.encoding = encoding.intern();
190: charset = Charset.forName(this.encoding);
191: encoder = charset.newEncoder();
192: this.eol = (eol != null) ? eol : System.getProperty("line.separator");
193: namespaces = new LinkedList();
194: }
195:
196: void setCdataSectionElements(Collection c)
197: {
198: cdataSectionElements = c;
199: }
200:
201: public void serialize(final Node node, final OutputStream out)
202: throws IOException
203: {
204: serialize(node, out, false);
205: }
206:
207: void serialize(Node node, final OutputStream out,
208: boolean convertToCdata)
209: throws IOException
210: {
211: while (node != null)
212: {
213: Node next = node.getNextSibling();
214: doSerialize(node, out, convertToCdata);
215: node = next;
216: }
217: }
218:
219: private void doSerialize(final Node node, final OutputStream out,
220: boolean convertToCdata)
221: throws IOException
222: {
223: if (out == null)
224: throw new NullPointerException("no output stream");
225: htmlEncoded = false;
226: String value, prefix;
227: Node children;
228: String uri = node.getNamespaceURI();
229: short nt = node.getNodeType();
230: if (convertToCdata && nt == Node.TEXT_NODE)
231: nt = Node.CDATA_SECTION_NODE;
232: switch (nt)
233: {
234: case Node.ATTRIBUTE_NODE:
235: prefix = node.getPrefix();
236: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) ||
237: XMLConstants.XMLNS_ATTRIBUTE.equals(prefix) ||
238: (prefix != null && prefix.startsWith("xmlns:")))
239: {
240: String nsuri = node.getNodeValue();
241: if (isDefined(nsuri, prefix))
242: break;
243: String name = node.getLocalName();
244: if (name == null)
245: {
246:
247: name = node.getNodeName();
248: int ci = name.indexOf(':');
249: if (ci != -1)
250: name = name.substring(ci + 1);
251: }
252: define(nsuri, name);
253: }
254: else if (uri != null && !isDefined(uri, prefix))
255: {
256: prefix = define(uri, prefix);
257: String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
258: out.write(SPACE);
259: out.write(encodeText(nsname));
260: out.write(EQ);
261: String nsvalue = "\"" + encode(uri, true, true) + "\"";
262: out.write(nsvalue.getBytes(encoding));
263: }
264: out.write(SPACE);
265: String a_nodeName = node.getNodeName();
266: out.write(encodeText(a_nodeName));
267: String a_nodeValue = node.getNodeValue();
268: if (mode == Stylesheet.OUTPUT_HTML &&
269: a_nodeName.equals(a_nodeValue) &&
270: isHTMLBoolean((Attr) node, a_nodeName))
271: break;
272: out.write(EQ);
273: value = "\"" + encode(a_nodeValue, true, true) + "\"";
274: out.write(encodeText(value));
275: break;
276: case Node.ELEMENT_NODE:
277: pushNamespaceContext();
278: value = node.getNodeName();
279: out.write(BRA);
280: out.write(encodeText(value));
281: prefix = node.getPrefix();
282: if (uri != null && !isDefined(uri, prefix))
283: {
284: prefix = define(uri, prefix);
285: String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
286: out.write(SPACE);
287: out.write(encodeText(nsname));
288: out.write(EQ);
289: String nsvalue = "\"" + encode(uri, true, true) + "\"";
290: out.write(encodeText(nsvalue));
291: }
292: NamedNodeMap attrs = node.getAttributes();
293: if (attrs != null)
294: {
295: int len = attrs.getLength();
296: for (int i = 0; i < len; i++)
297: {
298: Attr attr = (Attr) attrs.item(i);
299: if (discardDefaultContent && !attr.getSpecified())
300: {
301:
302: }
303: else
304: serialize(attr, out, false);
305: }
306: }
307: convertToCdata = cdataSectionElements.contains(value);
308: children = node.getFirstChild();
309: if (children == null)
310: {
311: out.write(SLASH);
312: out.write(KET);
313: }
314: else
315: {
316: out.write(KET);
317: serialize(children, out, convertToCdata);
318: out.write(BRA);
319: out.write(SLASH);
320: out.write(encodeText(value));
321: out.write(KET);
322: }
323: popNamespaceContext();
324: break;
325: case Node.TEXT_NODE:
326: value = node.getNodeValue();
327: if (!"yes".equals(node.getUserData("disable-output-escaping")) &&
328: mode != Stylesheet.OUTPUT_TEXT)
329: value = encode(value, false, false);
330: out.write(encodeText(value));
331: break;
332: case Node.CDATA_SECTION_NODE:
333: value = node.getNodeValue();
334:
335:
336: int bbk = value.indexOf("]]>");
337: while (bbk != -1)
338: {
339: String head = value.substring(0, bbk + 2);
340: out.write(encodeText("<![CDATA[" + head + "]]>"));
341: value = value.substring(bbk + 2);
342: bbk = value.indexOf("]]>");
343: }
344:
345: out.write(encodeText("<![CDATA[" + value + "]]>"));
346: break;
347: case Node.COMMENT_NODE:
348: value = "<!--" + node.getNodeValue() + "-->";
349: out.write(encodeText(value));
350: Node cp = node.getParentNode();
351: if (cp != null && cp.getNodeType() == Node.DOCUMENT_NODE)
352: out.write(encodeText(eol));
353: break;
354: case Node.DOCUMENT_NODE:
355: case Node.DOCUMENT_FRAGMENT_NODE:
356: if (mode == Stylesheet.OUTPUT_XML)
357: {
358: if ("UTF-16".equalsIgnoreCase(encoding))
359: {
360: out.write(0xfe);
361: out.write(0xff);
362: }
363: if (!"yes".equals(node.getUserData("omit-xml-declaration")) &&
364: xmlDeclaration)
365: {
366: Document doc = (node instanceof Document) ?
367: (Document) node : null;
368: String version = (doc != null) ? doc.getXmlVersion() : null;
369: if (version == null)
370: version = (String) node.getUserData("version");
371: if (version == null)
372: version = "1.0";
373: out.write(BRA);
374: out.write(0x3f);
375: out.write("xml version=\"".getBytes("US-ASCII"));
376: out.write(version.getBytes("US-ASCII"));
377: out.write(0x22);
378: if (!("UTF-8".equalsIgnoreCase(encoding)))
379: {
380: out.write(" encoding=\"".getBytes("US-ASCII"));
381: out.write(encoding.getBytes("US-ASCII"));
382: out.write(0x22);
383: }
384: if ((doc != null && doc.getXmlStandalone()) ||
385: "yes".equals(node.getUserData("standalone")))
386: out.write(" standalone=\"yes\"".getBytes("US-ASCII"));
387: out.write(0x3f);
388: out.write(KET);
389: out.write(encodeText(eol));
390: }
391:
392:
393: }
394: else if (mode == Stylesheet.OUTPUT_HTML)
395: {
396:
397: String mediaType = (String) node.getUserData("media-type");
398: if (mediaType == null)
399: mediaType = "text/html";
400: String contentType = mediaType + "; charset=" +
401: ((encoding.indexOf(' ') != -1) ?
402: "\"" + encoding + "\"" :
403: encoding);
404: Document doc = (node instanceof Document) ? (Document) node :
405: node.getOwnerDocument();
406: Node html = null;
407: for (Node ctx = node.getFirstChild(); ctx != null;
408: ctx = ctx.getNextSibling())
409: {
410: if (ctx.getNodeType() == Node.ELEMENT_NODE &&
411: isHTMLElement(ctx, "html"))
412: {
413: html = ctx;
414: break;
415: }
416: }
417: if (html != null)
418: {
419: Node head = null;
420: for (Node ctx = html.getFirstChild(); ctx != null;
421: ctx = ctx.getNextSibling())
422: {
423: if (isHTMLElement(ctx, "head"))
424: {
425: head = ctx;
426: break;
427: }
428: }
429: if (head != null)
430: {
431: Node meta = null;
432: Node metaContent = null;
433: for (Node ctx = head.getFirstChild(); ctx != null;
434: ctx = ctx.getNextSibling())
435: {
436: if (isHTMLElement(ctx, "meta"))
437: {
438: NamedNodeMap metaAttrs = ctx.getAttributes();
439: int len = metaAttrs.getLength();
440: String httpEquiv = null;
441: Node content = null;
442: for (int i = 0; i < len; i++)
443: {
444: Node attr = metaAttrs.item(i);
445: String attrName = attr.getNodeName();
446: if ("http-equiv".equalsIgnoreCase(attrName))
447: httpEquiv = attr.getNodeValue();
448: else if ("content".equalsIgnoreCase(attrName))
449: content = attr;
450: }
451: if ("Content-Type".equalsIgnoreCase(httpEquiv))
452: {
453: meta = ctx;
454: metaContent = content;
455: break;
456: }
457: }
458: }
459: if (meta == null)
460: {
461: meta = doc.createElement("meta");
462:
463: Node first = head.getFirstChild();
464: if (first == null)
465: head.appendChild(meta);
466: else
467: head.insertBefore(meta, first);
468: Node metaHttpEquiv = doc.createAttribute("http-equiv");
469: meta.getAttributes().setNamedItem(metaHttpEquiv);
470: metaHttpEquiv.setNodeValue("Content-Type");
471: }
472: if (metaContent == null)
473: {
474: metaContent = doc.createAttribute("content");
475: meta.getAttributes().setNamedItem(metaContent);
476: }
477: metaContent.setNodeValue(contentType);
478: htmlEncoded = true;
479: }
480: }
481: }
482: children = node.getFirstChild();
483: if (children != null)
484: serialize(children, out, convertToCdata);
485: break;
486: case Node.DOCUMENT_TYPE_NODE:
487: DocumentType doctype = (DocumentType) node;
488: out.write(BRA);
489: out.write(BANG);
490: out.write(encodeText("DOCTYPE "));
491: value = doctype.getNodeName();
492: out.write(encodeText(value));
493: String publicId = doctype.getPublicId();
494: if (publicId != null)
495: {
496: out.write(encodeText(" PUBLIC "));
497: out.write(APOS);
498: out.write(encodeText(publicId));
499: out.write(APOS);
500: }
501: String systemId = doctype.getSystemId();
502: if (systemId != null)
503: {
504: out.write(encodeText(" SYSTEM "));
505: out.write(APOS);
506: out.write(encodeText(systemId));
507: out.write(APOS);
508: }
509: String internalSubset = doctype.getInternalSubset();
510: if (internalSubset != null)
511: {
512: out.write(encodeText(internalSubset));
513: }
514: out.write(KET);
515: out.write(eol.getBytes(encoding));
516: break;
517: case Node.ENTITY_REFERENCE_NODE:
518: value = "&" + node.getNodeValue() + ";";
519: out.write(encodeText(value));
520: break;
521: case Node.PROCESSING_INSTRUCTION_NODE:
522: value = "<?" + node.getNodeName() + " " + node.getNodeValue() + "?>";
523: out.write(encodeText(value));
524: Node pp = node.getParentNode();
525: if (pp != null && pp.getNodeType() == Node.DOCUMENT_NODE)
526: {
527: out.write(encodeText(eol));
528: }
529: break;
530: default:
531: System.err.println("Unhandled node type: "+nt);
532: }
533: }
534:
535: boolean isHTMLElement(Node node, String name)
536: {
537: if (node.getNodeType() != Node.ELEMENT_NODE)
538: return false;
539: String localName = node.getLocalName();
540: if (localName == null)
541: localName = node.getNodeName();
542: if (!name.equalsIgnoreCase(localName))
543: return false;
544: String uri = node.getNamespaceURI();
545: return (uri == null || HTML_URIS.contains(uri));
546: }
547:
548: boolean isDefined(String uri, String prefix)
549: {
550: if (XMLConstants.XML_NS_URI.equals(uri))
551: return "xml".equals(prefix);
552: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri))
553: return "xmlns".equals(prefix);
554: if (prefix == null)
555: prefix = "";
556: for (Iterator i = namespaces.iterator(); i.hasNext(); )
557: {
558: Map ctx = (Map) i.next();
559: String val = (String) ctx.get(uri);
560: if (val != null && val.equals(prefix))
561: return true;
562: }
563: return false;
564: }
565:
566: void pushNamespaceContext()
567: {
568: namespaces.addFirst(new HashMap());
569: }
570:
571: String define(String uri, String prefix)
572: {
573: if (namespaces.isEmpty())
574: return prefix;
575: HashMap ctx = (HashMap) namespaces.getFirst();
576: while (ctx.containsValue(prefix))
577: {
578:
579: prefix = prefix + "_";
580: }
581: ctx.put(uri, prefix);
582: return prefix;
583: }
584:
585: void popNamespaceContext()
586: {
587: namespaces.removeFirst();
588: }
589:
590: final byte[] encodeText(String text)
591: throws IOException
592: {
593: encoder.reset();
594: boolean htmlNeedingEncoding =
595: (mode == Stylesheet.OUTPUT_HTML && !htmlEncoded);
596: if (!encoder.canEncode(text) || htmlNeedingEncoding)
597: {
598:
599: StringBuffer buf = new StringBuffer();
600: int len = text.length();
601: for (int i = 0; i < len; i++)
602: {
603: char c = text.charAt(i);
604: if (!encoder.canEncode(c))
605: {
606:
607: String hex = Integer.toHexString((int) c);
608: buf.append("&#x");
609: buf.append(hex);
610: buf.append(';');
611: }
612: else if (htmlNeedingEncoding)
613: {
614: String entityName = getHTMLCharacterEntity(c);
615: if (entityName != null)
616: {
617: buf.append('&');
618: buf.append(entityName);
619: buf.append(';');
620: }
621: else
622: buf.append(c);
623: }
624: else
625: buf.append(c);
626: }
627: text = buf.toString();
628: }
629: ByteBuffer encoded = encoder.encode(CharBuffer.wrap(text));
630: int len = encoded.limit() - encoded.position();
631: if (encoded.hasArray())
632: {
633: byte[] ret = encoded.array();
634: if (ret.length > len)
635: {
636:
637: byte[] ret2 = new byte[len];
638: System.arraycopy(ret, 0, ret2, 0, len);
639: ret = ret2;
640: }
641: return ret;
642: }
643: encoded.flip();
644: byte[] ret = new byte[len];
645: encoded.get(ret, 0, len);
646: return ret;
647: }
648:
649: String encode(String text, boolean encodeCtl, boolean inAttr)
650: {
651: int len = text.length();
652: StringBuffer buf = null;
653: for (int i = 0; i < len; i++)
654: {
655: char c = text.charAt(i);
656: if (c == '<')
657: {
658: if (buf == null)
659: buf = new StringBuffer(text.substring(0, i));
660: buf.append("<");
661: }
662: else if (c == '>')
663: {
664: if (buf == null)
665: buf = new StringBuffer(text.substring(0, i));
666: buf.append(">");
667: }
668: else if (c == '&')
669: {
670: if (mode == Stylesheet.OUTPUT_HTML && (i + 1) < len &&
671: text.charAt(i + 1) == '{')
672: {
673: if (buf != null)
674: buf.append(c);
675: }
676: else
677: {
678: if (buf == null)
679: buf = new StringBuffer(text.substring(0, i));
680: buf.append("&");
681: }
682: }
683: else if (c == '\'' && inAttr)
684: {
685: if (buf == null)
686: buf = new StringBuffer(text.substring(0, i));
687: if (mode == Stylesheet.OUTPUT_HTML)
688:
689: buf.append("'");
690: else
691: buf.append("'");
692: }
693: else if (c == '"' && inAttr)
694: {
695: if (buf == null)
696: buf = new StringBuffer(text.substring(0, i));
697: buf.append(""");
698: }
699: else if (encodeCtl)
700: {
701: if (c < 0x20)
702: {
703: if (buf == null)
704: buf = new StringBuffer(text.substring(0, i));
705: buf.append('&');
706: buf.append('#');
707: buf.append((int) c);
708: buf.append(';');
709: }
710: else if (buf != null)
711: buf.append(c);
712: }
713: else if (buf != null)
714: buf.append(c);
715: }
716: return (buf == null) ? text : buf.toString();
717: }
718:
719: String toString(Node node)
720: {
721: ByteArrayOutputStream out = new ByteArrayOutputStream();
722: try
723: {
724: serialize(node, out);
725: return new String(out.toByteArray(), encoding);
726: }
727: catch (IOException e)
728: {
729: throw new RuntimeException(e.getMessage());
730: }
731: }
732:
733: boolean isHTMLBoolean(Attr attr, String attrName)
734: {
735: attrName = attrName.toLowerCase();
736: Node element = attr.getOwnerElement();
737: String elementName = element.getLocalName();
738: if (elementName == null)
739: {
740: elementName = element.getNodeName();
741: }
742: elementName = elementName.toLowerCase();
743: Collection attributes =
744: (Collection) HTML_BOOLEAN_ATTRIBUTES.get(elementName);
745: return (attributes != null && attributes.contains(attrName));
746: }
747:
748: static String getHTMLCharacterEntity(char c)
749: {
750:
751: switch (c)
752: {
753: case 160: return "nbsp";
754: case 161: return "iexcl";
755: case 162: return "cent";
756: case 163: return "pound";
757: case 164: return "curren";
758: case 165: return "yen";
759: case 166: return "brvbar";
760: case 167: return "sect";
761: case 168: return "uml";
762: case 169: return "copy";
763: case 170: return "ordf";
764: case 171: return "laquo";
765: case 172: return "not";
766: case 173: return "shy";
767: case 174: return "reg";
768: case 175: return "macr";
769: case 176: return "deg";
770: case 177: return "plusmn";
771: case 178: return "sup2";
772: case 179: return "sup3";
773: case 180: return "acute";
774: case 181: return "micro";
775: case 182: return "para";
776: case 183: return "middot";
777: case 184: return "cedil";
778: case 185: return "sup1";
779: case 186: return "ordm";
780: case 187: return "raquo";
781: case 188: return "frac14";
782: case 189: return "frac12";
783: case 190: return "frac34";
784: case 191: return "iquest";
785: case 192: return "Agrave";
786: case 193: return "Aacute";
787: case 194: return "Acirc";
788: case 195: return "Atilde";
789: case 196: return "Auml";
790: case 197: return "Aring";
791: case 198: return "AElig";
792: case 199: return "Ccedil";
793: case 200: return "Egrave";
794: case 201: return "Eacute";
795: case 202: return "Ecirc";
796: case 203: return "Euml";
797: case 204: return "Igrave";
798: case 205: return "Iacute";
799: case 206: return "Icirc";
800: case 207: return "Iuml";
801: case 208: return "ETH";
802: case 209: return "Ntilde";
803: case 210: return "Ograve";
804: case 211: return "Oacute";
805: case 212: return "Ocirc";
806: case 213: return "Otilde";
807: case 214: return "Ouml";
808: case 215: return "times";
809: case 216: return "Oslash";
810: case 217: return "Ugrave";
811: case 218: return "Uacute";
812: case 219: return "Ucirc";
813: case 220: return "Uuml";
814: case 221: return "Yacute";
815: case 222: return "THORN";
816: case 223: return "szlig";
817: case 224: return "agrave";
818: case 225: return "aacute";
819: case 226: return "acirc";
820: case 227: return "atilde";
821: case 228: return "auml";
822: case 229: return "aring";
823: case 230: return "aelig";
824: case 231: return "ccedil";
825: case 232: return "egrave";
826: case 233: return "eacute";
827: case 234: return "ecirc";
828: case 235: return "euml";
829: case 236: return "igrave";
830: case 237: return "iacute";
831: case 238: return "icirc";
832: case 239: return "iuml";
833: case 240: return "eth";
834: case 241: return "ntilde";
835: case 242: return "ograve";
836: case 243: return "oacute";
837: case 244: return "ocirc";
838: case 245: return "otilde";
839: case 246: return "ouml";
840: case 247: return "divide";
841: case 248: return "oslash";
842: case 249: return "ugrave";
843: case 250: return "uacute";
844: case 251: return "ucirc";
845: case 252: return "uuml";
846: case 253: return "yacute";
847: case 254: return "thorn";
848: case 255: return "yuml";
849: default: return null;
850: }
851: }
852:
853: }