Categories:
Audio (13)
Biotech (29)
Bytecode (36)
Database (77)
Framework (7)
Game (7)
General (507)
Graphics (53)
I/O (35)
IDE (2)
JAR Tools (101)
JavaBeans (21)
JDBC (121)
JDK (426)
JSP (20)
Logging (108)
Mail (58)
Messaging (8)
Network (84)
PDF (97)
Report (7)
Scripting (84)
Security (32)
Server (121)
Servlet (26)
SOAP (24)
Testing (54)
Web (15)
XML (309)
Collections:
Other Resources:
Woodstox 6.4.0 - Source Code Files
Woodstox 6.4.0 Source Code Files are provided at the Woodstox GitHub Website.
You can download them from the "src/main/java" folder.
You can also browse Woodstox Source Code files below:
✍: FYIcenter
⏎ com/ctc/wstx/io/CharsetNames.java
package com.ctc.wstx.io; import java.io.OutputStreamWriter; import java.io.Writer; import com.ctc.wstx.util.StringUtil; /** * Simple utility class that normalizes given character input character * set names into canonical (within Woodstox, anyways) names. */ public final class CharsetNames { /* ////////////////////////////////////////////////// // Canonical names used internally ////////////////////////////////////////////////// */ // // // Unicode variants: public final static String CS_US_ASCII = "US-ASCII"; public final static String CS_UTF8 = "UTF-8"; /** * This constants is intentionally vague, so that some other information * will be needed to determine the endianness. */ public final static String CS_UTF16 = "UTF-16"; public final static String CS_UTF16BE = "UTF-16BE"; public final static String CS_UTF16LE = "UTF-16LE"; public final static String CS_UTF32 = "UTF-32"; public final static String CS_UTF32BE = "UTF-32BE"; public final static String CS_UTF32LE = "UTF-32LE"; // // // 8-bit ISO encodings: public final static String CS_ISO_LATIN1 = "ISO-8859-1"; // // // Japanese non-unicode encodings: public final static String CS_SHIFT_JIS = "Shift_JIS"; // // // Other oddities: /* There are tons of EBCDIC varieties, with similar but * non-identical names. As a result, we can not give or use * just a single canonical name for general use. * However, we can choose a single one to use for bootstrapping; * that is, for parsing xml declaration to know the "real" * EBCDIC variant. */ public final static String CS_EBCDIC_SUBSET = "IBM037"; /* ////////////////////////////////////////////////// // Utility methods ////////////////////////////////////////////////// */ public static String normalize(String csName) { if (csName == null || csName.length() < 3) { return csName; } /* Canonical charset names here are from IANA recommendation: * http://www.iana.org/assignments/character-sets * but comparison is done loosely (case-insensitive, ignoring * spacing, underscore vs. hyphen etc) to try to make detection * as extensive as possible. */ /* But first bit of pre-filtering: it seems like 'cs' prefix * is applicable to pretty much all actual encodings (as per * IANA recommendations; csASCII, csUcs4 etc). So, let's just * strip out the prefix if so */ boolean gotCsPrefix = false; char c = csName.charAt(0); if (c == 'c' || c == 'C'){ char d = csName.charAt(1); if (d == 's' || d == 'S') { csName = csName.substring(2); c = csName.charAt(0); gotCsPrefix = true; } } switch (c) { case 'a': case 'A': if (StringUtil.equalEncodings(csName, "ASCII")) { return CS_US_ASCII; } break; case 'c': case 'C': /* Tons of variants: let's assume 'cpXXX' is an EBCDIC * variant, and should read 'IBMXXX' */ if (StringUtil.encodingStartsWith(csName, "cp")) { return "IBM" + StringUtil.trimEncoding(csName, true).substring(2); } // Hmmh. There are boatloads of these... but what to do with them? if (StringUtil.encodingStartsWith(csName, "cs")) { // Well, "csIBMxx" means EBCDIC of "IBMxx" if (StringUtil.encodingStartsWith(csName, "csIBM")) { // So let's just peel off "cs" prefix: return StringUtil.trimEncoding(csName, true).substring(2); } // !!! TBI } break; case 'e': case 'E': if (csName.startsWith("EBCDIC-CP-") || csName.startsWith("ebcdic-cp-")) { // EBCDIC, but which variety? // Let's trim out prefix to make comparison easier: String type = StringUtil.trimEncoding(csName, true).substring(8); // Note: these are suggested encodings of Xerces if (type.equals("US") || type.equals("CA") || type.equals("WT") || type.equals("NL")) { return "IBM037"; } if (type.equals("DK") || type.equals("NO")) { // Denmark, Norway return "IBM277"; } if (type.equals("FI") || type.equals("SE")) { // Finland, Sweden return "IBM278"; } if (type.equals("ROECE") || type.equals("YU")) { return "IBM870"; } if (type.equals("IT")) return "IBM280"; if (type.equals("ES")) return "IBM284"; if (type.equals("GB")) return "IBM285"; if (type.equals("FR")) return "IBM297"; if (type.equals("AR1")) return "IBM420"; if (type.equals("AR2")) return "IBM918"; if (type.equals("HE")) return "IBM424"; if (type.equals("CH")) return "IBM500"; if (type.equals("IS")) return "IBM871"; // Dunno... let's just default to 037? return CS_EBCDIC_SUBSET; } break; case 'i': case 'I': if (StringUtil.equalEncodings(csName, "ISO-8859-1") || StringUtil.equalEncodings(csName, "ISO-Latin1")) { return CS_ISO_LATIN1; } if (StringUtil.encodingStartsWith(csName, "ISO-10646")) { /* Hmmh. There are boatloads of alternatives here, it * seems (see http://www.iana.org/assignments/character-sets * for details) */ int ix = csName.indexOf("10646"); String suffix = csName.substring(ix+5); if (StringUtil.equalEncodings(suffix, "UCS-Basic")) { return CS_US_ASCII; } if (StringUtil.equalEncodings(suffix, "Unicode-Latin1")) { return CS_ISO_LATIN1; } if (StringUtil.equalEncodings(suffix, "UCS-2")) { return CS_UTF16; // endianness? } if (StringUtil.equalEncodings(suffix, "UCS-4")) { return CS_UTF32; // endianness? } if (StringUtil.equalEncodings(suffix, "UTF-1")) { // "Universal Transfer Format (1), this is the multibyte encoding, that subsets ASCII-7"??? return CS_US_ASCII; } if (StringUtil.equalEncodings(suffix, "J-1")) { // Name: ISO-10646-J-1, Source: ISO 10646 Japanese, see RFC 1815. // ... so what does that really mean? let's consider it ascii return CS_US_ASCII; } if (StringUtil.equalEncodings(suffix, "US-ASCII")) { return CS_US_ASCII; } } else if (StringUtil.encodingStartsWith(csName, "IBM")) { // EBCDIC of some kind... what (if anything) to do? // ... for now, return as is return csName; } break; case 'j': case 'J': if (StringUtil.equalEncodings(csName, "JIS_Encoding")) { return CS_SHIFT_JIS; } break; case 's': case 'S': if (StringUtil.equalEncodings(csName, "Shift_JIS")) { return CS_SHIFT_JIS; } break; case 'u': case 'U': if (csName.length() < 2) { // sanity check break; } switch (csName.charAt(1)) { case 'c': case 'C': if (StringUtil.equalEncodings(csName, "UCS-2")) { return CS_UTF16; } if (StringUtil.equalEncodings(csName, "UCS-4")) { return CS_UTF32; } break; case 'n': // csUnicodeXxx, case 'N': if (gotCsPrefix) { if (StringUtil.equalEncodings(csName, "Unicode")) { return CS_UTF16; // need BOM } if (StringUtil.equalEncodings(csName, "UnicodeAscii")) { return CS_ISO_LATIN1; } if (StringUtil.equalEncodings(csName, "UnicodeAscii")) { return CS_US_ASCII; } } break; case 's': case 'S': if (StringUtil.equalEncodings(csName, "US-ASCII")) { return CS_US_ASCII; } break; case 't': case 'T': if (StringUtil.equalEncodings(csName, "UTF-8")) { return CS_UTF8; } if (StringUtil.equalEncodings(csName, "UTF-16BE")) { return CS_UTF16BE; } if (StringUtil.equalEncodings(csName, "UTF-16LE")) { return CS_UTF16LE; } if (StringUtil.equalEncodings(csName, "UTF-16")) { return CS_UTF16; } if (StringUtil.equalEncodings(csName, "UTF-32BE")) { return CS_UTF32BE; } if (StringUtil.equalEncodings(csName, "UTF-32LE")) { return CS_UTF32LE; } if (StringUtil.equalEncodings(csName, "UTF-32")) { return CS_UTF32; } if (StringUtil.equalEncodings(csName, "UTF")) { // 21-Jan-2006, TSa: ??? What is this to do... ? return CS_UTF16; } } break; } return csName; } /** * Because of legacy encodings used by earlier JDK versions, we * need to be careful when accessing encoding names via JDK * classes. */ public static String findEncodingFor(Writer w) { if (w instanceof OutputStreamWriter) { String enc = ((OutputStreamWriter) w).getEncoding(); /* [WSTX-146]: It is important that we normalize this, since * older JDKs return legacy encoding names ("UTF8" instead of * canonical "UTF-8") */ return normalize(enc); } return null; } }
⏎ com/ctc/wstx/io/CharsetNames.java
Or download all of them as a single archive file:
File name: woodstox-core-6.4.0-fyi.zip File size: 552992 bytes Release date: 2022-10-25 Download
⇒ woodstox-core-6.4.0.jar - Woodstox Core 6.4.0
⇐ What Is Woodstox XML Processing
2023-01-29, 3377👍, 0💬
Popular Posts:
What Is jtds-1.2.2.jar? jtds-1.2.2.jar is the JAR files of jTDS Java library 1.2.2, which is a JDBC ...
jlGui is a music player for the Java platform. It is based on Java Sound 1.0 (i.e. JDK 1.3+). It sup...
JDK 11 java.xml.jmod is the JMOD file for JDK 11 XML (eXtensible Markup Language) module. JDK 11 XML...
JDK 11 java.xml.jmod is the JMOD file for JDK 11 XML (eXtensible Markup Language) module. JDK 11 XML...
How to download and install JDK (Java Development Kit) 1.3? If you want to write Java applications, ...