changeset 48813:725d71b428c3

8270646: Improved scanning of XML entities Reviewed-by: mbalao, andrew
author serb
date Wed, 19 Jan 2022 01:29:03 +0000
parents 76e7ac8ee6dd
children ae8938f8c5fc
files jaxp/src/com/sun/org/apache/xerces/internal/impl/XML11DocumentScannerImpl.java jaxp/src/com/sun/org/apache/xerces/internal/impl/XML11EntityScanner.java jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityScanner.java jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLScanner.java jaxp/src/com/sun/org/apache/xerces/internal/util/XMLStringBuffer.java jaxp/src/com/sun/org/apache/xerces/internal/xni/XMLString.java
diffstat 6 files changed, 209 insertions(+), 549 deletions(-) [+]
line wrap: on
line diff
--- a/jaxp/src/com/sun/org/apache/xerces/internal/impl/XML11DocumentScannerImpl.java	Wed Jan 19 01:15:09 2022 +0000
+++ b/jaxp/src/com/sun/org/apache/xerces/internal/impl/XML11DocumentScannerImpl.java	Wed Jan 19 01:29:03 2022 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
  */
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -53,7 +53,7 @@
  * @author Andy Clark, IBM
  * @author Arnaud  Le Hors, IBM
  * @author Eric Ye, IBM
- *
+ * @LastModified: Aug 2021
  */
 public class XML11DocumentScannerImpl
     extends XMLDocumentScannerImpl {
@@ -277,16 +277,6 @@
                                            + fStringBuffer.toString() + "\"");
                     }
                 }
-                // note that none of these characters should ever get through
-                // XML11EntityScanner.  Not sure why
-                // this check was originally necessary.  - NG
-                else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
-                    fEntityScanner.scanChar(null);
-                    fStringBuffer.append(' ');
-                    if (entityDepth == fEntityDepth) {
-                        fStringBuffer2.append('\n');
-                    }
-                }
                 else if (c != -1 && XMLChar.isHighSurrogate(c)) {
                     fStringBuffer3.clear();
                     if (scanSurrogates(fStringBuffer3)) {
--- a/jaxp/src/com/sun/org/apache/xerces/internal/impl/XML11EntityScanner.java	Wed Jan 19 01:15:09 2022 +0000
+++ b/jaxp/src/com/sun/org/apache/xerces/internal/impl/XML11EntityScanner.java	Wed Jan 19 01:29:03 2022 +0000
@@ -20,6 +20,7 @@
 
 package com.sun.org.apache.xerces.internal.impl;
 
+import static com.sun.org.apache.xerces.internal.impl.Constants.XML_VERSION_1_1;
 import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
 import com.sun.org.apache.xerces.internal.util.XML11Char;
@@ -40,7 +41,7 @@
  * @author Michael Glavassevich, IBM
  * @author Neil Graham, IBM
  *
- * @LastModified: Apr 2021
+ * @LastModified: Aug 2021
  */
 
 public class XML11EntityScanner
@@ -116,7 +117,7 @@
                 load(1, false, false);
                 offset = 0;
             }
-            if (c == '\r' && external) {
+            if (c == '\r' && external && fCurrentEntity.position < fCurrentEntity.count) {
                 int cc = fCurrentEntity.ch[fCurrentEntity.position++];
                 if (cc != '\n' && cc != 0x85) {
                     fCurrentEntity.position--;
@@ -761,71 +762,12 @@
         }
 
         // normalize newlines
-        int offset = fCurrentEntity.position;
-        int c = fCurrentEntity.ch[offset];
-        int newlines = 0;
-        boolean counted = false;
-        boolean external = fCurrentEntity.isExternal();
-        if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
-            do {
-                c = fCurrentEntity.ch[fCurrentEntity.position++];
-                if ((c == '\r' ) && external) {
-                    newlines++;
-                    fCurrentEntity.lineNumber++;
-                    fCurrentEntity.columnNumber = 1;
-                    if (fCurrentEntity.position == fCurrentEntity.count) {
-                        checkEntityLimit(null, fCurrentEntity, offset, newlines);
-                        offset = 0;
-                        fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
-                        fCurrentEntity.position = newlines;
-                        fCurrentEntity.startPosition = newlines;
-                        if (load(newlines, false, true)) {
-                            counted = true;
-                            break;
-                        }
-                    }
-                    int cc = fCurrentEntity.ch[fCurrentEntity.position];
-                    if (cc == '\n' || cc == 0x85) {
-                        fCurrentEntity.position++;
-                        offset++;
-                    }
-                    /*** NEWLINE NORMALIZATION ***/
-                    else {
-                        newlines++;
-                    }
-                }
-                else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
-                    newlines++;
-                    fCurrentEntity.lineNumber++;
-                    fCurrentEntity.columnNumber = 1;
-                    if (fCurrentEntity.position == fCurrentEntity.count) {
-                        checkEntityLimit(null, fCurrentEntity, offset, newlines);
-                        offset = 0;
-                        fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
-                        fCurrentEntity.position = newlines;
-                        fCurrentEntity.startPosition = newlines;
-                        if (load(newlines, false, true)) {
-                            counted = true;
-                            break;
-                        }
-                    }
-                }
-                else {
-                    fCurrentEntity.position--;
-                    break;
-                }
-            } while (fCurrentEntity.position < fCurrentEntity.count - 1);
-            for (int i = offset; i < fCurrentEntity.position; i++) {
-                fCurrentEntity.ch[i] = '\n';
-            }
-            int length = fCurrentEntity.position - offset;
-            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
-                checkEntityLimit(null, fCurrentEntity, offset, length);
-                content.setValues(fCurrentEntity.ch, offset, length);
-                return -1;
-            }
+        if (normalizeNewlines(XML_VERSION_1_1, content, false, false, null)) {
+            return -1;
         }
 
+        int c;
+        boolean external = fCurrentEntity.isExternal();
         // inner loop, scanning for content
         if (external) {
             while (fCurrentEntity.position < fCurrentEntity.count) {
@@ -913,65 +855,12 @@
         }
 
         // normalize newlines
-        int offset = fCurrentEntity.position;
-        int c = fCurrentEntity.ch[offset];
-        int newlines = 0;
-        boolean external = fCurrentEntity.isExternal();
-        if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
-            do {
-                c = fCurrentEntity.ch[fCurrentEntity.position++];
-                if ((c == '\r' ) && external) {
-                    newlines++;
-                    fCurrentEntity.lineNumber++;
-                    fCurrentEntity.columnNumber = 1;
-                    if (fCurrentEntity.position == fCurrentEntity.count) {
-                        offset = 0;
-                        fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
-                        fCurrentEntity.position = newlines;
-                        fCurrentEntity.startPosition = newlines;
-                        if (load(newlines, false, true)) {
-                            break;
-                        }
-                    }
-                    int cc = fCurrentEntity.ch[fCurrentEntity.position];
-                    if (cc == '\n' || cc == 0x85) {
-                        fCurrentEntity.position++;
-                        offset++;
-                    }
-                    /*** NEWLINE NORMALIZATION ***/
-                    else {
-                        newlines++;
-                    }
-                }
-                else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
-                    newlines++;
-                    fCurrentEntity.lineNumber++;
-                    fCurrentEntity.columnNumber = 1;
-                    if (fCurrentEntity.position == fCurrentEntity.count) {
-                        offset = 0;
-                        fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
-                        fCurrentEntity.position = newlines;
-                        fCurrentEntity.startPosition = newlines;
-                        if (load(newlines, false, true)) {
-                            break;
-                        }
-                    }
-                }
-                else {
-                    fCurrentEntity.position--;
-                    break;
-                }
-            } while (fCurrentEntity.position < fCurrentEntity.count - 1);
-            for (int i = offset; i < fCurrentEntity.position; i++) {
-                fCurrentEntity.ch[i] = '\n';
-            }
-            int length = fCurrentEntity.position - offset;
-            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
-                content.setValues(fCurrentEntity.ch, offset, length);
-                return -1;
-            }
+        if (normalizeNewlines(XML_VERSION_1_1, content, false, true, null)) {
+            return -1;
         }
 
+        int c;
+        boolean external = fCurrentEntity.isExternal();
         // scan literal value
         if (external) {
             while (fCurrentEntity.position < fCurrentEntity.count) {
@@ -1093,66 +982,11 @@
             }
 
             // normalize newlines
-            int offset = fCurrentEntity.position;
-            int c = fCurrentEntity.ch[offset];
-            int newlines = 0;
-            if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
-                do {
-                    c = fCurrentEntity.ch[fCurrentEntity.position++];
-                    if ((c == '\r' ) && external) {
-                        newlines++;
-                        fCurrentEntity.lineNumber++;
-                        fCurrentEntity.columnNumber = 1;
-                        if (fCurrentEntity.position == fCurrentEntity.count) {
-                            offset = 0;
-                            fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
-                            fCurrentEntity.position = newlines;
-                            fCurrentEntity.startPosition = newlines;
-                            if (load(newlines, false, true)) {
-                                break;
-                            }
-                        }
-                        int cc = fCurrentEntity.ch[fCurrentEntity.position];
-                        if (cc == '\n' || cc == 0x85) {
-                            fCurrentEntity.position++;
-                            offset++;
-                        }
-                        /*** NEWLINE NORMALIZATION ***/
-                        else {
-                            newlines++;
-                        }
-                    }
-                    else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
-                        newlines++;
-                        fCurrentEntity.lineNumber++;
-                        fCurrentEntity.columnNumber = 1;
-                        if (fCurrentEntity.position == fCurrentEntity.count) {
-                            offset = 0;
-                            fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
-                            fCurrentEntity.position = newlines;
-                            fCurrentEntity.startPosition = newlines;
-                            fCurrentEntity.count = newlines;
-                            if (load(newlines, false, true)) {
-                                break;
-                            }
-                        }
-                    }
-                    else {
-                        fCurrentEntity.position--;
-                        break;
-                    }
-                } while (fCurrentEntity.position < fCurrentEntity.count - 1);
-                for (int i = offset; i < fCurrentEntity.position; i++) {
-                    fCurrentEntity.ch[i] = '\n';
-                }
-                int length = fCurrentEntity.position - offset;
-                if (fCurrentEntity.position == fCurrentEntity.count - 1) {
-                    checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
-                    buffer.append(fCurrentEntity.ch, offset, length);
-                    return true;
-                }
+            if (normalizeNewlines(XML_VERSION_1_1, buffer, true, false, NameType.COMMENT)) {
+                return true;
             }
 
+            int c;
             // iterate over buffer looking for delimiter
             if (external) {
                 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
@@ -1286,22 +1120,6 @@
             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
             return true;
         }
-        else if (c == '\n' && (cc == '\r' ) && fCurrentEntity.isExternal()) {
-            // handle newlines
-            if (fCurrentEntity.position == fCurrentEntity.count) {
-                invokeListeners(1);
-                fCurrentEntity.ch[0] = (char)cc;
-                load(1, false, false);
-            }
-            int ccc = fCurrentEntity.ch[++fCurrentEntity.position];
-            if (ccc == '\n' || ccc == 0x85) {
-                fCurrentEntity.position++;
-            }
-            fCurrentEntity.lineNumber++;
-            fCurrentEntity.columnNumber = 1;
-            checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
-            return true;
-        }
 
         // character was not skipped
         return false;
--- a/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityScanner.java	Wed Jan 19 01:15:09 2022 +0000
+++ b/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityScanner.java	Wed Jan 19 01:29:03 2022 +0000
@@ -21,6 +21,8 @@
 package com.sun.org.apache.xerces.internal.impl;
 
 
+import static com.sun.org.apache.xerces.internal.impl.Constants.XML_VERSION_1_0;
+import static com.sun.org.apache.xerces.internal.impl.Constants.XML_VERSION_1_1;
 import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
 import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
@@ -55,7 +57,7 @@
  * @author Arnaud  Le Hors, IBM
  * @author K.Venugopal Sun Microsystems
  *
- * @LastModified: Apr 2021
+ * @LastModified: Aug 2021
  */
 public class XMLEntityScanner implements XMLLocator  {
 
@@ -149,6 +151,15 @@
     // indicates that the operation is for detecting XML version
     boolean detectingVersion = false;
 
+    // offset of the current cursor position
+    int offset = 0;
+
+    // number of newlines in the current process
+    int newlines = 0;
+
+    // indicating whether the content has been counted towards limit
+    boolean counted = false;
+
     //
     // Constructors
     //
@@ -553,7 +564,7 @@
         }
 
         // scan character
-        int offset = fCurrentEntity.position;
+        offset = fCurrentEntity.position;
         int c = fCurrentEntity.ch[fCurrentEntity.position++];
         if (c == '\n' || (c == '\r' && isExternal)) {
             fCurrentEntity.lineNumber++;
@@ -561,10 +572,10 @@
             if (fCurrentEntity.position == fCurrentEntity.count) {
                 invokeListeners(1);
                 fCurrentEntity.ch[0] = (char)c;
-                load(1, false, false);
+                load(1, true, false);
                 offset = 0;
             }
-            if (c == '\r' && isExternal) {
+            if (c == '\r' && isExternal && fCurrentEntity.position < fCurrentEntity.count) {
                 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
                     fCurrentEntity.position--;
                 }
@@ -614,7 +625,7 @@
         }
 
         // scan nmtoken
-        int offset = fCurrentEntity.position;
+        offset = fCurrentEntity.position;
         boolean vc = false;
         char c;
         while (true){
@@ -695,7 +706,7 @@
         }
 
         // scan name
-        int offset = fCurrentEntity.position;
+        offset = fCurrentEntity.position;
         int length;
         if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
             if (++fCurrentEntity.position == fCurrentEntity.count) {
@@ -788,7 +799,7 @@
         }
 
         // scan qualified name
-        int offset = fCurrentEntity.position;
+        offset = fCurrentEntity.position;
 
         //making a check if if the specified character is a valid name start character
         //as defined by production [5] in the XML 1.0 specification.
@@ -1043,81 +1054,11 @@
         }
 
         // normalize newlines
-        int offset = fCurrentEntity.position;
-        int c = fCurrentEntity.ch[offset];
-        int newlines = 0;
-        boolean counted = false;
-        if (c == '\n' || (c == '\r' && isExternal)) {
-            if (DEBUG_BUFFER) {
-                System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
-                print();
-                System.out.println();
-            }
-            do {
-                c = fCurrentEntity.ch[fCurrentEntity.position++];
-                if (c == '\r' && isExternal) {
-                    newlines++;
-                    fCurrentEntity.lineNumber++;
-                    fCurrentEntity.columnNumber = 1;
-                    if (fCurrentEntity.position == fCurrentEntity.count) {
-                        checkEntityLimit(null, fCurrentEntity, offset, newlines);
-                        offset = 0;
-                        fCurrentEntity.position = newlines;
-                        if (load(newlines, false, true)) {
-                            counted = true;
-                            break;
-                        }
-                    }
-                    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
-                        fCurrentEntity.position++;
-                        offset++;
-                    }
-                    /*** NEWLINE NORMALIZATION ***/
-                    else {
-                        newlines++;
-                    }
-                } else if (c == '\n') {
-                    newlines++;
-                    fCurrentEntity.lineNumber++;
-                    fCurrentEntity.columnNumber = 1;
-                    if (fCurrentEntity.position == fCurrentEntity.count) {
-                        checkEntityLimit(null, fCurrentEntity, offset, newlines);
-                        offset = 0;
-                        fCurrentEntity.position = newlines;
-                        if (load(newlines, false, true)) {
-                            counted = true;
-                            break;
-                        }
-                    }
-                } else {
-                    fCurrentEntity.position--;
-                    break;
-                }
-            } while (fCurrentEntity.position < fCurrentEntity.count - 1);
-            for (int i = offset; i < fCurrentEntity.position; i++) {
-                fCurrentEntity.ch[i] = '\n';
-            }
-            int length = fCurrentEntity.position - offset;
-            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
-                checkEntityLimit(null, fCurrentEntity, offset, length);
-                //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
-                //on buffering the data..
-                content.setValues(fCurrentEntity.ch, offset, length);
-                //content.append(fCurrentEntity.ch, offset, length);
-                if (DEBUG_BUFFER) {
-                    System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
-                    print();
-                    System.out.println();
-                }
-                return -1;
-            }
-            if (DEBUG_BUFFER) {
-                System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
-                print();
-                System.out.println();
-            }
+        if (normalizeNewlines(XML_VERSION_1_0, content, false, false, null)) {
+            return -1;
         }
 
+        int c;
         while (fCurrentEntity.position < fCurrentEntity.count) {
             c = fCurrentEntity.ch[fCurrentEntity.position++];
             if (!XMLChar.isContent(c)) {
@@ -1202,85 +1143,14 @@
         }
 
         // normalize newlines
-        int offset = fCurrentEntity.position;
-        int c = fCurrentEntity.ch[offset];
-        int newlines = 0;
         if(whiteSpaceInfoNeeded)
             whiteSpaceLen=0;
-        if (c == '\n' || (c == '\r' && isExternal)) {
-            if (DEBUG_BUFFER) {
-                System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
-                print();
-                System.out.println();
-            }
-            do {
-                c = fCurrentEntity.ch[fCurrentEntity.position++];
-                if (c == '\r' && isExternal) {
-                    newlines++;
-                    fCurrentEntity.lineNumber++;
-                    fCurrentEntity.columnNumber = 1;
-                    if (fCurrentEntity.position == fCurrentEntity.count) {
-                        offset = 0;
-                        fCurrentEntity.position = newlines;
-                        if (load(newlines, false, true)) {
-                            break;
-                        }
-                    }
-                    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
-                        fCurrentEntity.position++;
-                        offset++;
-                    }
-                    /*** NEWLINE NORMALIZATION ***/
-                    else {
-                        newlines++;
-                    }
-                    /***/
-                } else if (c == '\n') {
-                    newlines++;
-                    fCurrentEntity.lineNumber++;
-                    fCurrentEntity.columnNumber = 1;
-                    if (fCurrentEntity.position == fCurrentEntity.count) {
-                        offset = 0;
-                        fCurrentEntity.position = newlines;
-                        if (load(newlines, false, true)) {
-                            break;
-                        }
-                    }
-                    /*** NEWLINE NORMALIZATION ***
-                     * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
-                     * && external) {
-                     * fCurrentEntity.position++;
-                     * offset++;
-                     * }
-                     * /***/
-                } else {
-                    fCurrentEntity.position--;
-                    break;
-                }
-            } while (fCurrentEntity.position < fCurrentEntity.count - 1);
-            int i=0;
-            for ( i = offset; i < fCurrentEntity.position; i++) {
-                fCurrentEntity.ch[i] = '\n';
-                storeWhiteSpace(i);
-            }
 
-            int length = fCurrentEntity.position - offset;
-            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
-                content.setValues(fCurrentEntity.ch, offset, length);
-                if (DEBUG_BUFFER) {
-                    System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
-                    print();
-                    System.out.println();
-                }
-                return -1;
-            }
-            if (DEBUG_BUFFER) {
-                System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
-                print();
-                System.out.println();
-            }
+        if (normalizeNewlines(XML_VERSION_1_0, content, false, true, null)) {
+            return -1;
         }
 
+        int c;
         // scan literal value
         for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
             c = fCurrentEntity.ch[fCurrentEntity.position];
@@ -1331,7 +1201,7 @@
      *
      * @param whiteSpacePos position of a whitespace in the scanner entity buffer
      */
-    private void storeWhiteSpace(int whiteSpacePos) {
+    void storeWhiteSpace(int whiteSpacePos) {
         if (whiteSpaceLen >= whiteSpaceLookup.length) {
             int [] tmp = new int[whiteSpaceLookup.length + 100];
             System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
@@ -1413,75 +1283,11 @@
                 return false;
             }
 
-            // normalize newlines
-            int offset = fCurrentEntity.position;
-            int c = fCurrentEntity.ch[offset];
-            int newlines = 0;
-            if (c == '\n' || (c == '\r' && isExternal)) {
-                if (DEBUG_BUFFER) {
-                    System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
-                    print();
-                    System.out.println();
-                }
-                do {
-                    c = fCurrentEntity.ch[fCurrentEntity.position++];
-                    if (c == '\r' && isExternal) {
-                        newlines++;
-                        fCurrentEntity.lineNumber++;
-                        fCurrentEntity.columnNumber = 1;
-                        if (fCurrentEntity.position == fCurrentEntity.count) {
-                            offset = 0;
-                            fCurrentEntity.position = newlines;
-                            if (load(newlines, false, true)) {
-                                break;
-                            }
-                        }
-                        if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
-                            fCurrentEntity.position++;
-                            offset++;
-                        }
-                        /*** NEWLINE NORMALIZATION ***/
-                        else {
-                            newlines++;
-                        }
-                    } else if (c == '\n') {
-                        newlines++;
-                        fCurrentEntity.lineNumber++;
-                        fCurrentEntity.columnNumber = 1;
-                        if (fCurrentEntity.position == fCurrentEntity.count) {
-                            offset = 0;
-                            fCurrentEntity.position = newlines;
-                            fCurrentEntity.count = newlines;
-                            if (load(newlines, false, true)) {
-                                break;
-                            }
-                        }
-                    } else {
-                        fCurrentEntity.position--;
-                        break;
-                    }
-                } while (fCurrentEntity.position < fCurrentEntity.count - 1);
-                for (int i = offset; i < fCurrentEntity.position; i++) {
-                    fCurrentEntity.ch[i] = '\n';
-                }
-                int length = fCurrentEntity.position - offset;
-                if (fCurrentEntity.position == fCurrentEntity.count - 1) {
-                    checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
-                    buffer.append(fCurrentEntity.ch, offset, length);
-                    if (DEBUG_BUFFER) {
-                        System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
-                        print();
-                        System.out.println();
-                    }
-                    return true;
-                }
-                if (DEBUG_BUFFER) {
-                    System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
-                    print();
-                    System.out.println();
-                }
+            if (normalizeNewlines(XML_VERSION_1_0, buffer, true, false, NameType.COMMENT)) {
+                return true;
             }
 
+            int c;
             // iterate over buffer looking for delimiter
             OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
                 c = fCurrentEntity.ch[fCurrentEntity.position++];
@@ -1561,7 +1367,7 @@
         }
 
         // skip character
-        int offset = fCurrentEntity.position;
+        offset = fCurrentEntity.position;
         int cc = fCurrentEntity.ch[fCurrentEntity.position];
         if (cc == c) {
             fCurrentEntity.position++;
@@ -1578,26 +1384,6 @@
             }
             checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
             return true;
-        } else if (c == '\n' && cc == '\r' && isExternal) {
-            // handle newlines
-            if (fCurrentEntity.position == fCurrentEntity.count) {
-                invokeListeners(1);
-                fCurrentEntity.ch[0] = (char)cc;
-                load(1, false, false);
-            }
-            fCurrentEntity.position++;
-            if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
-                fCurrentEntity.position++;
-            }
-            fCurrentEntity.lineNumber++;
-            fCurrentEntity.columnNumber = 1;
-            if (DEBUG_BUFFER) {
-                System.out.print(")skipChar, '"+(char)c+"': ");
-                print();
-                System.out.println(" -> true");
-            }
-            checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
-            return true;
         }
 
         // character was not skipped
@@ -1650,7 +1436,7 @@
 
         // skip spaces
         int c = fCurrentEntity.ch[fCurrentEntity.position];
-        int offset = fCurrentEntity.position - 1;
+        offset = fCurrentEntity.position - 1;
         if (XMLChar.isSpace(c)) {
             do {
                 boolean entityChanged = false;
@@ -2323,5 +2109,86 @@
 
     } // skipDeclSpaces():boolean
 
+    /**
+     * Normalizes newlines. As specified in XML specification, this method
+     * converts newlines, '\n', '\r' and '\r\n' to '\n' as 2.11 End-of-Line Handling.
+     * Further, it may put them in a cache for later process as needed, for example
+     * as specified in 3.3.3 Attribute-Value Normalization.
+     *
+     * @ImplNote this method does not limit to processing external parsed entities
+     * as 2.11 required. It handles all cases where newlines need to be processed.
+     *
+     * @param buffer the current content buffer
+     * @param append a flag indicating whether to append to the buffer
+     * @param storeWS a flag indicating whether the whitespaces need to be stored
+     * for later processing
+     * @param nt the type of the entity
+     * @return true if the cursor is at the end of the current entity, false otherwise.
+     * @throws IOException
+     */
+    protected boolean normalizeNewlines(short version, XMLString buffer, boolean append,
+            boolean storeWS, NameType nt)
+            throws IOException {
+        // normalize newlines
+        offset = fCurrentEntity.position;
+        int c = fCurrentEntity.ch[offset];
+        newlines = 0;
+        // how this information is used is determined by the caller of this method
+        counted = false;
+        if ((c == '\n' || c == '\r') ||
+                (version == XML_VERSION_1_1 && (c == 0x85 || c == 0x2028))) {
+            do {
+                c = fCurrentEntity.ch[fCurrentEntity.position++];
+                if ((c == '\n' || c == '\r') ||
+                    (version == XML_VERSION_1_1 && (c == 0x85 || c == 0x2028))) {
+                    newlines++;
+                    fCurrentEntity.lineNumber++;
+                    fCurrentEntity.columnNumber = 1;
+                    if (fCurrentEntity.position == fCurrentEntity.count) {
+                        checkEntityLimit(nt, fCurrentEntity, offset, newlines);
+                        offset = 0;
+                        fCurrentEntity.position = newlines;
+                        if (load(newlines, false, true)) {
+                            counted = true;
+                            break;
+                        }
+                    }
+                    if (c == '\r') {
+                        int cc = fCurrentEntity.ch[fCurrentEntity.position];
+                        if (cc == '\n' || (version == XML_VERSION_1_1 && cc == 0x85)) {
+                            fCurrentEntity.position++;
+                            offset++;
+                        }
+                        /*** NEWLINE NORMALIZATION ***/
+                        else {
+                            newlines++;
+                        }
+                    }
+                } else {
+                    fCurrentEntity.position--;
+                    break;
+                }
+            } while (fCurrentEntity.position < fCurrentEntity.count - 1);
 
+            for (int i = offset; i < fCurrentEntity.position; i++) {
+                fCurrentEntity.ch[i] = '\n';
+                if (storeWS) {
+                    storeWhiteSpace(i);
+                }
+            }
+
+            int length = fCurrentEntity.position - offset;
+            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
+                checkEntityLimit(nt, fCurrentEntity, offset, length);
+                if (append) {
+                    buffer.append(fCurrentEntity.ch, offset, length);
+                } else {
+                    buffer.setValues(fCurrentEntity.ch, offset, length);
+                }
+
+                return true;
+            }
+        }
+        return false;
+    }
 } // class XMLEntityScanner
--- a/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLScanner.java	Wed Jan 19 01:15:09 2022 +0000
+++ b/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLScanner.java	Wed Jan 19 01:29:03 2022 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -68,7 +68,7 @@
  * @author Eric Ye, IBM
  * @author K.Venugopal SUN Microsystems
  * @author Sunitha Reddy, SUN Microsystems
- * @LastModified: Feb 2020
+ * @LastModified: Aug 2021
  */
 public abstract class XMLScanner
         implements XMLComponent {
@@ -957,12 +957,6 @@
                         System.out.println("** valueF: \""
                                 + stringBuffer.toString() + "\"");
                     }
-                } else if (c == '\n' || c == '\r') {
-                    fEntityScanner.scanChar(null);
-                    stringBuffer.append(' ');
-                    if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
-                        fStringBuffer2.append('\n');
-                    }
                 } else if (c != -1 && XMLChar.isHighSurrogate(c)) {
                     fStringBuffer3.clear();
                     if (scanSurrogates(fStringBuffer3)) {
--- a/jaxp/src/com/sun/org/apache/xerces/internal/util/XMLStringBuffer.java	Wed Jan 19 01:15:09 2022 +0000
+++ b/jaxp/src/com/sun/org/apache/xerces/internal/util/XMLStringBuffer.java	Wed Jan 19 01:29:03 2022 +0000
@@ -1,6 +1,5 @@
 /*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
  */
 /*
  * The Apache Software License, Version 1.1
@@ -81,20 +80,12 @@
  *
  * @author Andy Clark, IBM
  * @author Eric Ye, IBM
- *
+ * @LastModified: Aug 2021
  */
 public class XMLStringBuffer
 extends XMLString {
 
     //
-    // Constants
-    //
-
-
-    /** Default buffer size (32). */
-    public static final int DEFAULT_SIZE = 32;
-
-    //
     // Data
     //
 
@@ -152,79 +143,4 @@
         length = 0;
     }
 
-    /**
-     * append
-     *
-     * @param c
-     */
-    public void append(char c) {
-        if(this.length + 1 > this.ch.length){
-            int newLength = this.ch.length * 2 ;
-            if(newLength < this.ch.length + DEFAULT_SIZE){
-                newLength = this.ch.length + DEFAULT_SIZE;
-            }
-            char [] tmp = new char[newLength];
-            System.arraycopy(this.ch, 0, tmp, 0, this.length);
-            this.ch = tmp;
-        }
-        this.ch[this.length] = c ;
-        this.length++;
-    } // append(char)
-
-    /**
-     * append
-     *
-     * @param s
-     */
-    public void append(String s) {
-        int length = s.length();
-        if (this.length + length > this.ch.length) {
-            int newLength = this.ch.length * 2 ;
-            if(newLength < this.ch.length + length + DEFAULT_SIZE){
-                newLength = this.ch.length + length+ DEFAULT_SIZE;
-            }
-
-            char[] newch = new char[newLength];
-            System.arraycopy(this.ch, 0, newch, 0, this.length);
-            this.ch = newch;
-        }
-        s.getChars(0, length, this.ch, this.length);
-        this.length += length;
-    } // append(String)
-
-    /**
-     * append
-     *
-     * @param ch
-     * @param offset
-     * @param length
-     */
-    public void append(char[] ch, int offset, int length) {
-        if (this.length + length > this.ch.length) {
-            int newLength = this.ch.length * 2 ;
-            if(newLength < this.ch.length + length + DEFAULT_SIZE){
-                newLength = this.ch.length + length + DEFAULT_SIZE;
-            }
-            char[] newch = new char[newLength];
-            System.arraycopy(this.ch, 0, newch, 0, this.length);
-            this.ch = newch;
-        }
-        //making the code more robust as it would handle null or 0 length data,
-        //add the data only when it contains some thing
-        if(ch != null && length > 0){
-            System.arraycopy(ch, offset, this.ch, this.length, length);
-            this.length += length;
-        }
-    } // append(char[],int,int)
-
-    /**
-     * append
-     *
-     * @param s
-     */
-    public void append(XMLString s) {
-        append(s.ch, s.offset, s.length);
-    } // append(XMLString)
-
-
 } // class XMLStringBuffer
--- a/jaxp/src/com/sun/org/apache/xerces/internal/xni/XMLString.java	Wed Jan 19 01:15:09 2022 +0000
+++ b/jaxp/src/com/sun/org/apache/xerces/internal/xni/XMLString.java	Wed Jan 19 01:29:03 2022 +0000
@@ -1,6 +1,5 @@
 /*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
  */
 /*
  * Copyright 2000-2002,2004 The Apache Software Foundation.
@@ -41,9 +40,11 @@
  *
  * @author Eric Ye, IBM
  * @author Andy Clark, IBM
- *
+ * @LastModified: Aug 2021
  */
 public class XMLString {
+    /** Default buffer size (32). */
+    public static final int DEFAULT_SIZE = 32;
 
     //
     // Data
@@ -188,4 +189,78 @@
         return length > 0 ? new String(ch, offset, length) : "";
     } // toString():String
 
+    /**
+     * Appends a char to the buffer.
+     *
+     * @param c the char
+     */
+    public void append(char c) {
+        if(this.length + 1 > this.ch.length){
+            int newLength = this.ch.length * 2 ;
+            if(newLength < this.ch.length + DEFAULT_SIZE){
+                newLength = this.ch.length + DEFAULT_SIZE;
+            }
+            char [] tmp = new char[newLength];
+            System.arraycopy(this.ch, 0, tmp, 0, this.length);
+            this.ch = tmp;
+        }
+        this.ch[this.length] = c ;
+        this.length++;
+    } // append(char)
+
+    /**
+     * Appends a string to the buffer.
+     *
+     * @param s the string
+     */
+    public void append(String s) {
+        int length = s.length();
+        if (this.length + length > this.ch.length) {
+            int newLength = this.ch.length * 2 ;
+            if(newLength < this.ch.length + length + DEFAULT_SIZE){
+                newLength = this.ch.length + length+ DEFAULT_SIZE;
+            }
+
+            char[] newch = new char[newLength];
+            System.arraycopy(this.ch, 0, newch, 0, this.length);
+            this.ch = newch;
+        }
+        s.getChars(0, length, this.ch, this.length);
+        this.length += length;
+    } // append(String)
+
+    /**
+     * Appends a number of characters to the buffer.
+     *
+     * @param ch the char array
+     * @param offset the offset
+     * @param length the length
+     */
+    public void append(char[] ch, int offset, int length) {
+        if (this.length + length > this.ch.length) {
+            int newLength = this.ch.length * 2 ;
+            if(newLength < this.ch.length + length + DEFAULT_SIZE){
+                newLength = this.ch.length + length + DEFAULT_SIZE;
+            }
+            char[] newch = new char[newLength];
+            System.arraycopy(this.ch, 0, newch, 0, this.length);
+            this.ch = newch;
+        }
+        //making the code more robust as it would handle null or 0 length data,
+        //add the data only when it contains some thing
+        if(ch != null && length > 0){
+            System.arraycopy(ch, offset, this.ch, this.length, length);
+            this.length += length;
+        }
+    } // append(char[],int,int)
+
+    /**
+     * Appends another buffer to this buffer
+     *
+     * @param s another buffer
+     */
+    public void append(XMLString s) {
+        append(s.ch, s.offset, s.length);
+    } // append(XMLString)
+
 } // class XMLString