Categories:
Audio (13)
Biotech (29)
Bytecode (36)
Database (77)
Framework (7)
Game (7)
General (507)
Graphics (53)
I/O (35)
IDE (2)
JAR Tools (101)
JavaBeans (21)
JDBC (121)
JDK (426)
JSP (20)
Logging (108)
Mail (58)
Messaging (8)
Network (84)
PDF (97)
Report (7)
Scripting (84)
Security (32)
Server (121)
Servlet (26)
SOAP (24)
Testing (54)
Web (15)
XML (309)
Collections:
Other Resources:
Woodstox 6.4.0 - Source Code Files
Woodstox 6.4.0 Source Code Files are provided at the Woodstox GitHub Website.
You can download them from the "src/main/java" folder.
You can also browse Woodstox Source Code files below:
✍: FYIcenter
⏎ com/ctc/wstx/io/UTF8Writer.java
package com.ctc.wstx.io; import java.io.*; import com.ctc.wstx.api.WriterConfig; /** * Specialized buffering UTF-8 writer used by * {@link com.ctc.wstx.sw.XmlWriter}. * The main reason for custom version is to allow for efficient * buffer recycling; the second benefit is that encoder has less * overhead for short content encoding (compared to JDK default * codecs). */ public final class UTF8Writer extends Writer implements CompletelyCloseable { private final static int DEFAULT_BUF_LEN = 4000; final static int SURR1_FIRST = 0xD800; final static int SURR1_LAST = 0xDBFF; final static int SURR2_FIRST = 0xDC00; final static int SURR2_LAST = 0xDFFF; final WriterConfig mConfig; final boolean mAutoCloseOutput; final OutputStream mOut; byte[] mOutBuffer; final int mOutBufferLast; int mOutPtr; /** * When outputting chars from BMP, surrogate pairs need to be coalesced. * To do this, both pairs must be known first; and since it is possible * pairs may be split, we need temporary storage for the first half */ int mSurrogate = 0; public UTF8Writer(WriterConfig cfg, OutputStream out, boolean autoclose) { mConfig = cfg; mAutoCloseOutput = autoclose; mOut = out; mOutBuffer = (mConfig == null) ? new byte[DEFAULT_BUF_LEN] : cfg.allocFullBBuffer(DEFAULT_BUF_LEN); /* Max. expansion for a single char (in unmodified UTF-8) is * 4 bytes (or 3 depending on how you view it -- 4 when recombining * surrogate pairs) */ mOutBufferLast = mOutBuffer.length - 4; mOutPtr = 0; } /* //////////////////////////////////////////////////////// // CompletelyCloseable impl //////////////////////////////////////////////////////// */ @Override public void closeCompletely() throws IOException { _close(true); } /* //////////////////////////////////////////////////////// // java.io.Writer implementation //////////////////////////////////////////////////////// */ /* !!! 30-Nov-2006, TSa: Due to co-variance between Appendable and * Writer, this would not compile with javac 1.5, in 1.4 mode * (source and target set to "1.4". Not a huge deal, but since * the base impl is just fine, no point in overriding it. */ /* public Writer append(char c) throws IOException // note: this is a JDK 1.5 method { write(c); return this; } */ @Override public void close() throws IOException { _close(mAutoCloseOutput); } @Override public void flush() throws IOException { if (mOutPtr > 0 && mOutBuffer != null) { mOut.write(mOutBuffer, 0, mOutPtr); mOutPtr = 0; } mOut.flush(); } @Override public void write(char[] cbuf) throws IOException { write(cbuf, 0, cbuf.length); } @Override public void write(char[] cbuf, int off, int len) throws IOException { if (len < 2) { if (len == 1) { write(cbuf[off]); } return; } // First: do we have a leftover surrogate to deal with? if (mSurrogate > 0) { char second = cbuf[off++]; --len; write(_convertSurrogate(second)); // will have at least one more char } int outPtr = mOutPtr; byte[] outBuf = mOutBuffer; int outBufLast = mOutBufferLast; // has 4 'spare' bytes // All right; can just loop it nice and easy now: len += off; // len will now be the end of input buffer output_loop: for (; off < len; ) { /* First, let's ensure we can output at least 4 bytes * (longest UTF-8 encoded codepoint): */ if (outPtr >= outBufLast) { mOut.write(outBuf, 0, outPtr); outPtr = 0; } int c = cbuf[off++]; // And then see if we have an Ascii char: if (c < 0x80) { // If so, can do a tight inner loop: outBuf[outPtr++] = (byte)c; // Let's calc how many ascii chars we can copy at most: int maxInCount = (len - off); int maxOutCount = (outBufLast - outPtr); if (maxInCount > maxOutCount) { maxInCount = maxOutCount; } maxInCount += off; ascii_loop: while (true) { if (off >= maxInCount) { // done with max. ascii seq continue output_loop; } c = cbuf[off++]; if (c >= 0x80) { break ascii_loop; } outBuf[outPtr++] = (byte) c; } } // Nope, multi-byte: if (c < 0x800) { // 2-byte outBuf[outPtr++] = (byte) (0xc0 | (c >> 6)); outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f)); } else { // 3 or 4 bytes // Surrogates? if (c < SURR1_FIRST || c > SURR2_LAST) { outBuf[outPtr++] = (byte) (0xe0 | (c >> 12)); outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f)); continue; } // Yup, a surrogate: if (c > SURR1_LAST) { // must be from first range mOutPtr = outPtr; throwIllegal(c); } mSurrogate = c; // and if so, followed by another from next range if (off >= len) { // unless we hit the end? break; } c = _convertSurrogate(cbuf[off++]); if (c > 0x10FFFF) { // illegal, as per RFC 3629 mOutPtr = outPtr; throwIllegal(c); } outBuf[outPtr++] = (byte) (0xf0 | (c >> 18)); outBuf[outPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f)); outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f)); } } mOutPtr = outPtr; } @Override public void write(int c) throws IOException { // First; do we have a left over surrogate? if (mSurrogate > 0) { c = _convertSurrogate(c); // If not, do we start with a surrogate? } else if (c >= SURR1_FIRST && c <= SURR2_LAST) { // Illegal to get second part without first: if (c > SURR1_LAST) { throwIllegal(c); } // First part just needs to be held for now mSurrogate = c; return; } if (mOutPtr >= mOutBufferLast) { // let's require enough room, first mOut.write(mOutBuffer, 0, mOutPtr); mOutPtr = 0; } if (c < 0x80) { // ascii mOutBuffer[mOutPtr++] = (byte) c; } else { int ptr = mOutPtr; if (c < 0x800) { // 2-byte mOutBuffer[ptr++] = (byte) (0xc0 | (c >> 6)); mOutBuffer[ptr++] = (byte) (0x80 | (c & 0x3f)); } else if (c <= 0xFFFF) { // 3 bytes mOutBuffer[ptr++] = (byte) (0xe0 | (c >> 12)); mOutBuffer[ptr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); mOutBuffer[ptr++] = (byte) (0x80 | (c & 0x3f)); } else { // 4 bytes if (c > 0x10FFFF) { // illegal, as per RFC 3629 throwIllegal(c); } mOutBuffer[ptr++] = (byte) (0xf0 | (c >> 18)); mOutBuffer[ptr++] = (byte) (0x80 | ((c >> 12) & 0x3f)); mOutBuffer[ptr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); mOutBuffer[ptr++] = (byte) (0x80 | (c & 0x3f)); } mOutPtr = ptr; } } @Override public void write(String str) throws IOException { write(str, 0, str.length()); } @Override public void write(String str, int off, int len) throws IOException { if (len < 2) { if (len == 1) { write(str.charAt(off)); } return; } // First: do we have a leftover surrogate to deal with? if (mSurrogate > 0) { char second = str.charAt(off++); --len; write(_convertSurrogate(second)); // will have at least one more char (case of 1 char was checked earlier on) } int outPtr = mOutPtr; byte[] outBuf = mOutBuffer; int outBufLast = mOutBufferLast; // has 4 'spare' bytes // All right; can just loop it nice and easy now: len += off; // len will now be the end of input buffer output_loop: for (; off < len; ) { /* First, let's ensure we can output at least 4 bytes * (longest UTF-8 encoded codepoint): */ if (outPtr >= outBufLast) { mOut.write(outBuf, 0, outPtr); outPtr = 0; } int c = str.charAt(off++); // And then see if we have an Ascii char: if (c < 0x80) { // If so, can do a tight inner loop: outBuf[outPtr++] = (byte)c; // Let's calc how many ascii chars we can copy at most: int maxInCount = (len - off); int maxOutCount = (outBufLast - outPtr); if (maxInCount > maxOutCount) { maxInCount = maxOutCount; } maxInCount += off; ascii_loop: while (true) { if (off >= maxInCount) { // done with max. ascii seq continue output_loop; } c = str.charAt(off++); if (c >= 0x80) { break ascii_loop; } outBuf[outPtr++] = (byte) c; } } // Nope, multi-byte: if (c < 0x800) { // 2-byte outBuf[outPtr++] = (byte) (0xc0 | (c >> 6)); outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f)); } else { // 3 or 4 bytes // Surrogates? if (c < SURR1_FIRST || c > SURR2_LAST) { outBuf[outPtr++] = (byte) (0xe0 | (c >> 12)); outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f)); continue; } // Yup, a surrogate: if (c > SURR1_LAST) { // must be from first range mOutPtr = outPtr; throwIllegal(c); } mSurrogate = c; // and if so, followed by another from next range if (off >= len) { // unless we hit the end? break; } c = _convertSurrogate(str.charAt(off++)); if (c > 0x10FFFF) { // illegal, as per RFC 3629 mOutPtr = outPtr; throwIllegal(c); } outBuf[outPtr++] = (byte) (0xf0 | (c >> 18)); outBuf[outPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f)); outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f)); } } mOutPtr = outPtr; } /* //////////////////////////////////////////////////////////// // Internal methods //////////////////////////////////////////////////////////// */ private final void _close(boolean forceClosing) throws IOException { byte[] buf = mOutBuffer; if (buf != null) { mOutBuffer = null; if (mOutPtr > 0) { mOut.write(buf, 0, mOutPtr); mOutPtr = 0; } if (mConfig != null) { mConfig.freeFullBBuffer(buf); } } if (forceClosing) { mOut.close(); } /* Let's 'flush' orphan surrogate, no matter what; but only * after cleanly closing everything else. */ int code = mSurrogate; if (code > 0) { mSurrogate = 0; throwIllegal(code); } } /** * Method called to calculate UTF codepoint, from a surrogate pair. */ private final int _convertSurrogate(int secondPart) throws IOException { int firstPart = mSurrogate; mSurrogate = 0; // Ok, then, is the second part valid? if (secondPart < SURR2_FIRST || secondPart > SURR2_LAST) { throw new IOException("Broken surrogate pair: first char 0x"+Integer.toHexString(firstPart)+", second 0x"+Integer.toHexString(secondPart)+"; illegal combination"); } return 0x10000 + ((firstPart - SURR1_FIRST) << 10) + (secondPart - SURR2_FIRST); } private void throwIllegal(int code) throws IOException { if (code > 0x10FFFF) { // over max? throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output; max is 0x10FFFF as per RFC 3629"); } if (code >= SURR1_FIRST) { if (code <= SURR1_LAST) { // Unmatched first part (closing without second part?) throw new IOException("Unmatched first part of surrogate pair (0x"+Integer.toHexString(code)+")"); } throw new IOException("Unmatched second part of surrogate pair (0x"+Integer.toHexString(code)+")"); } // should we ever get this? throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output"); } }
⏎ com/ctc/wstx/io/UTF8Writer.java
Or download all of them as a single archive file:
File name: woodstox-core-6.4.0-fyi.zip File size: 552992 bytes Release date: 2022-10-25 Download
⇒ woodstox-core-6.4.0.jar - Woodstox Core 6.4.0
⇐ What Is Woodstox XML Processing
2023-01-29, 3366👍, 0💬
Popular Posts:
Apache Log4j API provides the interface that applications should code to and provides the adapter co...
JDK 8 tools.jar is the JAR file for JDK 8 tools. It contains Java classes to support different JDK t...
iText is an ideal library for developers looking to enhance web- and other applications with dynamic...
How to display XML element type information with the jaxp\TypeInfoWriter.java provided in the Apache...
Jackson is "the Java JSON library" or "the best JSON parser for Java". Or simply as "JSON for Java"....