95 @Override 96 public int charCount() { 97 return charCount(rawHtmlContent); 98 } 99 100 static int charCount(CharSequence htmlText) { 101 State state = State.TEXT; 102 int count = 0; 103 for (int i = 0; i < htmlText.length(); i++) { 104 char c = htmlText.charAt(i); 105 switch (state) { 106 case TEXT: 107 switch (c) { 108 case '<': 109 state = State.TAG; 110 break; 111 case '&': 112 state = State.ENTITY; 113 count++; 114 break; 115 default: 116 count++; 117 } 118 break; 119 120 case ENTITY: 121 if (!Character.isLetterOrDigit(c)) 122 state = State.TEXT; 123 break; 124 125 case TAG: 126 switch (c) { 127 case '"': 128 state = State.STRING; 129 break; 130 case '>': 131 state = State.TEXT; 132 break; 133 } 134 break; | 95 @Override 96 public int charCount() { 97 return charCount(rawHtmlContent); 98 } 99 100 static int charCount(CharSequence htmlText) { 101 State state = State.TEXT; 102 int count = 0; 103 for (int i = 0; i < htmlText.length(); i++) { 104 char c = htmlText.charAt(i); 105 switch (state) { 106 case TEXT: 107 switch (c) { 108 case '<': 109 state = State.TAG; 110 break; 111 case '&': 112 state = State.ENTITY; 113 count++; 114 break; 115 case '\r': 116 case '\n': 117 // Windows uses "\r\n" as line separator while UNIX uses "\n". 118 // Ignore line separators to get consistent results across platforms. 119 break; 120 default: 121 count++; 122 } 123 break; 124 125 case ENTITY: 126 if (!Character.isLetterOrDigit(c)) 127 state = State.TEXT; 128 break; 129 130 case TAG: 131 switch (c) { 132 case '"': 133 state = State.STRING; 134 break; 135 case '>': 136 state = State.TEXT; 137 break; 138 } 139 break; |