--- old/make/CompileModuleTools.gmk 2020-03-23 19:56:17.827962840 +0100
+++ new/make/CompileModuleTools.gmk 2020-03-23 19:56:17.475962843 +0100
@@ -37,9 +37,7 @@
$(eval $(call SetupJavaCompilation,BUILD_JIGSAW_TOOLS, \
SETUP := GENERATE_USINGJDKBYTECODE, \
SRC := $(TOPDIR)/make/jdk/src/classes, \
- INCLUDES := build/tools/deps \
- build/tools/docs \
- build/tools/jigsaw, \
+ INCLUDES := build/tools/jigsaw, \
COPY := .properties .html, \
BIN := $(TOOLS_CLASSES_DIR), \
DISABLED_WARNINGS := fallthrough, \
--- old/make/CompileToolsJdk.gmk 2020-03-23 19:56:18.591962835 +0100
+++ new/make/CompileToolsJdk.gmk 2020-03-23 19:56:18.259962837 +0100
@@ -27,6 +27,7 @@
include $(SPEC)
include MakeBase.gmk
+include Modules.gmk
include JavaCompilation.gmk
include SetupJavaCompilers.gmk
include TextFileProcessing.gmk
@@ -39,6 +40,7 @@
# Use += to be able to add to this from a custom extension
BUILD_TOOLS_SRC_DIRS += \
+ $(call FindAllToolsDirs) \
$(TOPDIR)/make/jdk/src/classes \
$(BUILDTOOLS_OUTPUTDIR)/interim_tzdb_classes \
#
@@ -48,10 +50,9 @@
SRC := $(BUILD_TOOLS_SRC_DIRS), \
EXCLUDES := \
build/tools/classlist \
- build/tools/deps \
- build/tools/docs \
build/tools/jigsaw \
build/tools/depend \
+ org/openjdk/buildtools/symbolgenerator \
, \
BIN := $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes, \
ADD_JAVAC_FLAGS := \
@@ -62,10 +63,11 @@
TARGETS += $(BUILD_TOOLS_JDK)
-$(eval $(call SetupCopyFiles,COPY_NIMBUS_TEMPLATES, \
+$(eval $(call SetupCopyFiles, COPY_NIMBUS_TEMPLATES, \
SRC := $(TOPDIR)/src/java.desktop/share/classes/javax/swing/plaf/nimbus, \
- DEST := $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes/build/tools/generatenimbus/resources, \
- FILES := $(wildcard $(TOPDIR)/src/java.desktop/share/classes/javax/swing/plaf/nimbus/*.template)))
+ DEST := $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes/org/openjdk/buildtools/generatenimbus/resources, \
+ FILES := $(wildcard $(TOPDIR)/src/java.desktop/share/classes/javax/swing/plaf/nimbus/*.template), \
+))
TARGETS += $(COPY_NIMBUS_TEMPLATES)
--- old/make/CopyInterimTZDB.gmk 2020-03-23 19:56:19.299962830 +0100
+++ new/make/CopyInterimTZDB.gmk 2020-03-23 19:56:18.959962832 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -35,15 +35,16 @@
define tzdb_copyfiles
$(call MakeTargetDir)
$(RM) '$@'
- $(SED) -e "s/package java.time.zone/package build.tools.tzdb/" \
+ $(SED) -e "s/package java.time.zone/package org.openjdk.buildtools.tzdb/" \
< $(<) > $@
endef
-$(eval $(call SetupCopyFiles,COPY_INTERIM_TZDB, \
+$(eval $(call SetupCopyFiles, COPY_INTERIM_TZDB, \
SRC := $(TOPDIR)/src/java.base/share/classes/java/time/zone, \
- DEST := $(BUILDTOOLS_OUTPUTDIR)/interim_tzdb_classes/build/tools/tzdb, \
+ DEST := $(BUILDTOOLS_OUTPUTDIR)/interim_tzdb_classes/org/openjdk/buildtools/tzdb, \
FILES := ZoneRules.java ZoneOffsetTransition.java ZoneOffsetTransitionRule.java Ser.java, \
- MACRO := tzdb_copyfiles))
+ MACRO := tzdb_copyfiles, \
+))
##########################################################################################
--- old/make/ToolsJdk.gmk 2020-03-23 19:56:20.071962824 +0100
+++ new/make/ToolsJdk.gmk 2020-03-23 19:56:19.739962826 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -40,89 +40,23 @@
################################################################################
-TOOL_COMPILEFONTCONFIG = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- --add-exports java.desktop/sun.awt=ALL-UNNAMED \
- build.tools.compilefontconfig.CompileFontConfig
-
+# used by build system for many modules
TOOL_COMPILEPROPERTIES = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.compileproperties.CompileProperties
-TOOL_GENERATECHARACTER = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.generatecharacter.GenerateCharacter
-
-TOOL_CHARACTERNAME = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.generatecharacter.CharacterName
-
-TOOL_DTDBUILDER = $(JAVA_SMALL) -Ddtd_home=$(TOPDIR)/make/data/dtdbuilder \
- -Djava.awt.headless=true \
- -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes build.tools.dtdbuilder.DTDBuilder
-
-TOOL_GENERATEBREAKITERATORDATA = $(JAVA_SMALL) \
- -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.generatebreakiteratordata.GenerateBreakIteratorData
-
-TOOL_GENERATECURRENCYDATA = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.generatecurrencydata.GenerateCurrencyData
-
-TOOL_TZDB = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.tzdb.TzdbZoneRulesCompiler
-
-TOOL_BLACKLISTED_CERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.blacklistedcertsconverter.BlacklistedCertsConverter
-
-TOOL_MAKEJAVASECURITY = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.makejavasecurity.MakeJavaSecurity
-
-TOOL_GENERATECACERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.generatecacerts.GenerateCacerts
-
-TOOL_GENERATEEMOJIDATA = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.generateemojidata.GenerateEmojiData
-
-
-# TODO: There are references to the jdwpgen.jar in jdk/make/netbeans/jdwpgen/build.xml
-# and nbproject/project.properties in the same dir. Needs to be looked at.
-TOOL_JDWPGEN = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes build.tools.jdwpgen.Main
-
-# TODO: Lots of files in jdk/make/tools/CharsetMapping dir
+# shared by java.base and jdk.charsets
TOOL_CHARSETMAPPING = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.charsetmapping.Main $(LOG_INFO)
-TOOL_SPP = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes build.tools.spp.Spp
-
-# Nimbus is used somewhere in the swing build.
-TOOL_GENERATENIMBUS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.generatenimbus.Generator
-
-TOOL_WRAPPERGENERATOR = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.x11wrappergen.WrapperGenerator
-
-TOOL_AWT_TOBIN = $(JAVA_SMALL) -Djava.awt.headless=true -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.icondata.awt.ToBin
-
-TOOL_OSX_TOBIN = $(JAVA_SMALL) -Djava.awt.headless=true -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.icondata.osxapp.ToBin
-
+# shared by java.base and jdk.localedata
TOOL_CLDRCONVERTER = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.cldrconverter.CLDRConverter
-TOOL_INTPOLY = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.intpoly.FieldGen
-
-TOOL_GENERATELSREQUIVMAPS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.generatelsrequivmaps.EquivMapsGenerator
-
-TOOL_GENMODULEINFOSOURCE = $(JAVA_SMALL) $(INTERIM_LANGTOOLS_BOOTCLASSPATH) \
- -cp $(call PathList, $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes) \
+# used by build system for all modules
+TOOL_GENMODULEINFOSOURCE = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.module.GenModuleInfoSource
-TOOL_GENCLASSLOADERMAP = $(JAVA_SMALL) $(INTERIM_LANGTOOLS_BOOTCLASSPATH) \
- -cp $(call PathList, $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes) \
- build.tools.module.GenModuleLoaderMap
-
-TOOL_PUBLICSUFFIXLIST = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
- build.tools.publicsuffixlist.GeneratePublicSuffixList
-
+# used by build system for docs
TOOL_FIXUPPANDOC = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.fixuppandoc.Main
--- old/make/UpdateX11Wrappers.gmk 2020-03-23 19:56:20.843962818 +0100
+++ new/make/UpdateX11Wrappers.gmk 2020-03-23 19:56:20.515962821 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -47,6 +47,9 @@
$(error It is not possible to update the x11wrappers when cross-compiling)
endif
+TOOL_WRAPPERGENERATOR = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.x11wrappergen.WrapperGenerator
+
X11WRAPPERS_OUTPUT := $(SUPPORT_OUTPUTDIR)/x11wrappers
GENERATOR_SOURCE_FILE := $(X11WRAPPERS_OUTPUT)/src/data_generator.c
--- old/make/common/Modules.gmk 2020-03-23 19:56:21.595962813 +0100
+++ new/make/common/Modules.gmk 2020-03-23 19:56:21.275962815 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -203,7 +203,7 @@
# The native dynamic libraries in these modules will also get built into static
# libraries for consumption by downstream projects that need to statically link
# the JDK libraries. Those static libraries are not part of the main JDK
-# distribution.
+# distribution.
STATIC_LIBS_MODULES := \
java.base \
jdk.crypto.ec \
@@ -274,6 +274,8 @@
MAN_SUBDIRS += share/man
+TOOLS_SUBDIRS += share/tools
+
# Find all module-info.java files for the current build target platform and
# configuration.
# Param 1 - Module to find for, set to * for finding all
@@ -338,6 +340,13 @@
$(strip $(wildcard \
$(foreach sub, $(MAN_SUBDIRS), $(addsuffix /$(strip $1)/$(sub), $(TOP_SRC_DIRS)))))
+# Find all tools directories for all modules for the current build target platform and
+# configuration.
+FindAllToolsDirs = \
+ $(sort $(wildcard \
+ $(foreach sub, $(TOOLS_SUBDIRS), \
+ $(patsubst %,%/*/$(sub), $(TOP_SRC_DIRS)))))
+
# Construct the complete module source path
GetModuleSrcPath = \
$(call PathList, \
--- old/make/data/symbols/symbols 2020-03-23 19:56:22.383962807 +0100
+++ new/make/data/symbols/symbols 2020-03-23 19:56:22.035962809 +0100
@@ -27,7 +27,7 @@
# ##########################################################
#
#command used to generate this file:
-#build.tools.symbolgenerator.CreateSymbols build-description-incremental symbols include.list
+#org.openjdk.buildtools.symbolgenerator.CreateSymbols build-description-incremental symbols include.list
#
generate platforms 7:8:9:A:B:C:D:E
platform version 8 files java.activation-8.sym.txt:java.base-8.sym.txt:java.compiler-8.sym.txt:java.corba-8.sym.txt:java.datatransfer-8.sym.txt:java.desktop-8.sym.txt:java.instrument-8.sym.txt:java.logging-8.sym.txt:java.management-8.sym.txt:java.management.rmi-8.sym.txt:java.naming-8.sym.txt:java.prefs-8.sym.txt:java.rmi-8.sym.txt:java.scripting-8.sym.txt:java.security.jgss-8.sym.txt:java.security.sasl-8.sym.txt:java.sql-8.sym.txt:java.sql.rowset-8.sym.txt:java.transaction-8.sym.txt:java.xml-8.sym.txt:java.xml.bind-8.sym.txt:java.xml.crypto-8.sym.txt:java.xml.ws-8.sym.txt:java.xml.ws.annotation-8.sym.txt:jdk.httpserver-8.sym.txt:jdk.management-8.sym.txt:jdk.scripting.nashorn-8.sym.txt:jdk.sctp-8.sym.txt:jdk.security.auth-8.sym.txt:jdk.security.jgss-8.sym.txt
--- old/make/gendata/Gendata-java.base.gmk 2020-03-23 19:56:23.131962801 +0100
+++ new/make/gendata/Gendata-java.base.gmk 2020-03-23 19:56:22.815962804 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -40,6 +40,9 @@
################################################################################
+TOOL_CHARACTERNAME = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.generatecharacter.CharacterName
+
GENDATA_UNINAME := $(JDK_OUTPUTDIR)/modules/java.base/java/lang/uniName.dat
$(GENDATA_UNINAME): $(TOPDIR)/make/data/unicodedata/UnicodeData.txt $(BUILD_TOOLS_JDK)
@@ -52,6 +55,9 @@
GENDATA_CURDATA := $(JDK_OUTPUTDIR)/modules/java.base/java/util/currency.data
+TOOL_GENERATECURRENCYDATA = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.generatecurrencydata.GenerateCurrencyData
+
$(GENDATA_CURDATA): $(TOPDIR)/make/data/currency/CurrencyData.properties $(BUILD_TOOLS_JDK)
$(call MakeDir, $(@D))
$(RM) $@
@@ -63,6 +69,9 @@
################################################################################
+TOOL_GENERATECACERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.generatecacerts.GenerateCacerts
+
GENDATA_CACERTS_SRC := $(TOPDIR)/make/data/cacerts/
GENDATA_CACERTS := $(SUPPORT_OUTPUTDIR)/modules_libs/java.base/security/cacerts
@@ -88,6 +97,9 @@
# RESTRICTED_PKGS_SRC is optionally set in custom extension for this makefile
+TOOL_MAKEJAVASECURITY = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.makejavasecurity.MakeJavaSecurity
+
$(GENDATA_JAVA_SECURITY): $(BUILD_TOOLS_JDK) $(GENDATA_JAVA_SECURITY_SRC) $(RESTRICTED_PKGS_SRC)
$(call LogInfo, Generating java.security)
$(call MakeTargetDir)
--- old/make/gendata/Gendata-jdk.compiler.gmk 2020-03-23 19:56:23.907962796 +0100
+++ new/make/gendata/Gendata-jdk.compiler.gmk 2020-03-23 19:56:23.567962798 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -54,9 +54,9 @@
$(eval $(call SetupJavaCompilation, COMPILE_CREATE_SYMBOLS, \
SETUP := GENERATE_OLDBYTECODE, \
- SRC := $(TOPDIR)/make/langtools/src/classes \
+ SRC := $(TOPDIR)/src/jdk.compiler/share/tools \
$(TOPDIR)/src/jdk.jdeps/share/classes, \
- INCLUDES := build/tools/symbolgenerator com/sun/tools/classfile, \
+ INCLUDES := org/openjdk/buildtools/symbolgenerator com/sun/tools/classfile, \
BIN := $(BUILDTOOLS_OUTPUTDIR)/create_symbols, \
ADD_JAVAC_FLAGS := $(INTERIM_LANGTOOLS_ARGS) \
--patch-module java.base=$(BUILDTOOLS_OUTPUTDIR)/gensrc/java.base.interim \
@@ -73,14 +73,14 @@
$(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \
$(COMPILECREATESYMBOLS_ADD_EXPORTS) \
-classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols \
- build.tools.symbolgenerator.CreateSymbols \
+ org.openjdk.buildtools.symbolgenerator.CreateSymbols \
build-ctsym \
$(CT_DATA_DESCRIPTION) \
$(@D)
$(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \
$(COMPILECREATESYMBOLS_ADD_EXPORTS) \
-classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols \
- build.tools.symbolgenerator.TransitiveDependencies \
+ org.openjdk.buildtools.symbolgenerator.TransitiveDependencies \
$(@D) \
$(CT_MODULESOURCEPATH) \
$(CT_MODULES)
--- old/make/gendata/GendataBlacklistedCerts.gmk 2020-03-23 19:56:24.687962790 +0100
+++ new/make/gendata/GendataBlacklistedCerts.gmk 2020-03-23 19:56:24.343962792 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,9 @@
$(eval $(call IncludeCustomExtension, gendata/GendataBlacklistedCerts.gmk))
+TOOL_BLACKLISTED_CERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.blacklistedcertsconverter.BlacklistedCertsConverter
+
GENDATA_BLACKLISTED_CERTS_SRC += $(TOPDIR)/make/data/blacklistedcertsconverter/blacklisted.certs.pem
GENDATA_BLACKLISTED_CERTS := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE)/security/blacklisted.certs
--- old/make/gendata/GendataBreakIterator.gmk 2020-03-23 19:56:25.427962784 +0100
+++ new/make/gendata/GendataBreakIterator.gmk 2020-03-23 19:56:25.095962787 +0100
@@ -1,5 +1,5 @@
-# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -72,6 +72,10 @@
# input
UNICODEDATA := $(TOPDIR)/make/data/unicodedata/UnicodeData.txt
+TOOL_GENERATEBREAKITERATORDATA = $(JAVA_SMALL) \
+ -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.generatebreakiteratordata.GenerateBreakIteratorData
+
# output
BASE_DATA_PKG_DIR := $(JDK_OUTPUTDIR)/modules/java.base/sun/text/resources
LD_DATA_PKG_DIR := $(JDK_OUTPUTDIR)/modules/jdk.localedata/sun/text/resources/ext
--- old/make/gendata/GendataFontConfig.gmk 2020-03-23 19:56:26.171962779 +0100
+++ new/make/gendata/GendataFontConfig.gmk 2020-03-23 19:56:25.839962781 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,10 @@
# questions.
#
+TOOL_COMPILEFONTCONFIG = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ --add-exports java.desktop/sun.awt=ALL-UNNAMED \
+ org.openjdk.buildtools.compilefontconfig.CompileFontConfig
+
GENDATA_FONT_CONFIG_DST := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE)
GENDATA_FONT_CONFIG_DATA_DIR ?= $(TOPDIR)/make/data/fontconfig
--- old/make/gendata/GendataHtml32dtd.gmk 2020-03-23 19:56:26.875962774 +0100
+++ new/make/gendata/GendataHtml32dtd.gmk 2020-03-23 19:56:26.555962776 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,10 @@
# questions.
#
+TOOL_DTDBUILDER = $(JAVA_SMALL) -Ddtd_home=$(TOPDIR)/make/data/dtdbuilder \
+ -Djava.awt.headless=true \
+ -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes org.openjdk.buildtools.dtdbuilder.DTDBuilder
+
GENDATA_HTML32DTD :=
HTML32DTD = $(JDK_OUTPUTDIR)/modules/java.desktop/javax/swing/text/html/parser/html32.bdtd
--- old/make/gendata/GendataPublicSuffixList.gmk 2020-03-23 19:56:27.615962768 +0100
+++ new/make/gendata/GendataPublicSuffixList.gmk 2020-03-23 19:56:27.279962771 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,9 @@
include $(SPEC)
+TOOL_PUBLICSUFFIXLIST = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.publicsuffixlist.GeneratePublicSuffixList
+
GENDATA_PUBLICSUFFIXLIST_SRC += $(TOPDIR)/make/data/publicsuffixlist/public_suffix_list.dat
GENDATA_PUBLICSUFFIXLIST := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE)/security/public_suffix_list.dat
--- old/make/gendata/GendataTZDB.gmk 2020-03-23 19:56:28.383962763 +0100
+++ new/make/gendata/GendataTZDB.gmk 2020-03-23 19:56:28.047962765 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,8 @@
GENDATA_TZDB :=
+TOOL_TZDB = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.tzdb.TzdbZoneRulesCompiler
#
# Time zone data file creation
#
--- old/make/gensrc/Gensrc-java.base.gmk 2020-03-23 19:56:29.139962757 +0100
+++ new/make/gensrc/Gensrc-java.base.gmk 2020-03-23 19:56:28.815962759 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,10 @@
include GensrcCommonJdk.gmk
+# Used by GensrcBuffer.gmk, GensrcCharsetCoder.gmk and GensrcVarHandles.gmk
+TOOL_SPP = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.spp.Spp
+
include GensrcLocaleData.gmk
include GensrcCharacterData.gmk
include GensrcMisc.gmk
@@ -90,6 +94,9 @@
################################################################################
+TOOL_GENERATELSREQUIVMAPS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.generatelsrequivmaps.EquivMapsGenerator
+
GENSRC_LSREQUIVMAPS := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/sun/util/locale/LocaleEquivalentMaps.java
$(GENSRC_LSREQUIVMAPS): $(TOPDIR)/make/data/lsrdata/language-subtag-registry.txt $(BUILD_TOOLS_JDK)
@@ -100,8 +107,11 @@
################################################################################
+TOOL_INTPOLY = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.intpoly.FieldGen
+
INTPOLY_GEN_DONE := $(GENSRC_DIR)/_intpoly-gensrc.marker
-INTPOLY_HEADER := $(TOPDIR)/make/jdk/src/classes/build/tools/intpoly/header.txt
+INTPOLY_HEADER := $(TOPDIR)/src/java.base/share/tools/org/openjdk/buildtools/intpoly/header.txt
$(INTPOLY_GEN_DONE): $(INTPLOY_HEADER) $(BUILD_TOOLS_JDK)
$(call MakeDir, $(GENSRC_DIR))
$(call LogInfo, Generating fixed-field math classes for java.base)
--- old/make/gensrc/Gensrc-jdk.jdi.gmk 2020-03-23 19:56:29.871962752 +0100
+++ new/make/gensrc/Gensrc-jdk.jdi.gmk 2020-03-23 19:56:29.575962754 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,9 @@
# Translate the Java debugger wire protocol (jdwp.spec) file into a JDWP.java file
# and a JDWPCommands.h C-header file.
+TOOL_JDWPGEN = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.jdwpgen.Main
+
JDWP_SPEC_FILE := $(TOPDIR)/make/data/jdwp/jdwp.spec
HEADER_FILE := $(SUPPORT_OUTPUTDIR)/headers/jdk.jdwp.agent/JDWPCommands.h
JAVA_FILE := $(SUPPORT_OUTPUTDIR)/gensrc/jdk.jdi/com/sun/tools/jdi/JDWP.java
--- old/make/gensrc/GensrcCharacterData.gmk 2020-03-23 19:56:30.575962746 +0100
+++ new/make/gensrc/GensrcCharacterData.gmk 2020-03-23 19:56:30.279962749 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,9 @@
# Rules to create $(SUPPORT_OUTPUTDIR)/gensrc/java.base/sun/lang/CharacterData*.java
#
+TOOL_GENERATECHARACTER = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.generatecharacter.GenerateCharacter
+
GENSRC_CHARACTERDATA :=
CHARACTERDATA = $(TOPDIR)/make/data/characterdata
--- old/make/gensrc/GensrcEmojiData.gmk 2020-03-23 19:56:31.343962741 +0100
+++ new/make/gensrc/GensrcEmojiData.gmk 2020-03-23 19:56:31.011962743 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,9 @@
# Rules to create $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/util/regex/EmojiData.java
#
+TOOL_GENERATEEMOJIDATA = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.generateemojidata.GenerateEmojiData
+
GENSRC_EMOJIDATA := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/util/regex/EmojiData.java
EMOJIDATATEMP = $(TOPDIR)/src/java.base/share/classes/java/util/regex/EmojiData.java.template
--- old/make/gensrc/GensrcIcons.gmk 2020-03-23 19:56:32.103962735 +0100
+++ new/make/gensrc/GensrcIcons.gmk 2020-03-23 19:56:31.779962737 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -70,6 +70,10 @@
################################################################################
+TOOL_AWT_TOBIN = $(JAVA_SMALL) -Djava.awt.headless=true \
+ -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.icondataawt.ToBin
+
define SetupGensrcAWTIcon
# param 1 is for src-file
# param 2 is for src-dir
@@ -109,6 +113,9 @@
################################################################################
ifeq ($(call isTargetOs, macosx), true)
+ TOOL_OSX_TOBIN = $(JAVA_SMALL) -Djava.awt.headless=true \
+ -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.icondataosx.ToBin
GENSRC_OSX_ICONS_DST := $(SUPPORT_OUTPUTDIR)/headers/java.desktop
GENSRC_OSX_ICONS := $(GENSRC_OSX_ICONS_DST)/AWTIconData.h
--- old/make/gensrc/GensrcModuleLoaderMap.gmk 2020-03-23 19:56:32.871962729 +0100
+++ new/make/gensrc/GensrcModuleLoaderMap.gmk 2020-03-23 19:56:32.535962732 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -41,6 +41,10 @@
VARDEPS_VALUE := $(BOOT_MODULES_LIST) $(PLATFORM_MODULES_LIST)
VARDEPS_FILE := $(call DependOnVariable, VARDEPS_VALUE)
+TOOL_GENCLASSLOADERMAP = $(JAVA_SMALL) \
+ -cp $(call PathList, $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes) \
+ org.openjdk.buildtools.moduleloadermap.GenModuleLoaderMap
+
############################################################################
$(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java: \
--- old/make/gensrc/GensrcSwing.gmk 2020-03-23 19:56:33.603962724 +0100
+++ new/make/gensrc/GensrcSwing.gmk 2020-03-23 19:56:33.303962726 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,10 @@
#
# Generate java files for javax.swing.plaf package
#
+
+TOOL_GENERATENIMBUS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.generatenimbus.Generator
+
NIMBUS_PACKAGE = javax.swing.plaf
NIMBUS_GENSRC_DIR = $(SUPPORT_OUTPUTDIR)/gensrc/java.desktop/javax/swing/plaf/nimbus
NIMBUS_SKIN_FILE = $(TOPDIR)/src/java.desktop/share/classes/javax/swing/plaf/nimbus/skin.laf
--- old/make/gensrc/GensrcX11Wrappers.gmk 2020-03-23 19:56:34.327962719 +0100
+++ new/make/gensrc/GensrcX11Wrappers.gmk 2020-03-23 19:56:34.031962721 +0100
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,9 @@
# Generate java sources using the X11 offsets that are precalculated in files
# make/data/x11wrappergen/sizes-
.txt.
+TOOL_WRAPPERGENERATOR = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ org.openjdk.buildtools.x11wrappergen.WrapperGenerator
+
# Put the generated Java classes used to interface X11 from awt here.
GENSRC_X11WRAPPERS_OUTPUTDIR := $(SUPPORT_OUTPUTDIR)/gensrc/java.desktop/sun/awt/X11
--- old/make/langtools/test/sym/CreateSymbolsTest.java 2020-03-23 19:56:35.071962713 +0100
+++ new/make/langtools/test/sym/CreateSymbolsTest.java 2020-03-23 19:56:34.731962716 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -56,7 +56,7 @@
Path compileDir = testClasses.resolve("data");
deleteRecursively(compileDir);
Files.createDirectories(compileDir);
- Path createSymbols = findFile("../../make/src/classes/build/tools/symbolgenerator/CreateSymbols.java");
+ Path createSymbols = findFile("../../src/jdk.compiler/share/tools/org/openjdk/symbolgenerator/CreateSymbols.java");
if (createSymbols == null) {
System.err.println("Warning: cannot find CreateSymbols, skipping.");
--- old/make/langtools/test/sym/CreateSymbolsTestImpl.java 2020-03-23 19:56:35.847962707 +0100
+++ new/make/langtools/test/sym/CreateSymbolsTestImpl.java 2020-03-23 19:56:35.507962710 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -41,12 +41,12 @@
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import build.tools.symbolgenerator.CreateSymbols;
-import build.tools.symbolgenerator.CreateSymbols.ClassDescription;
-import build.tools.symbolgenerator.CreateSymbols.ClassList;
-import build.tools.symbolgenerator.CreateSymbols.CtSymKind;
-import build.tools.symbolgenerator.CreateSymbols.ExcludeIncludeList;
-import build.tools.symbolgenerator.CreateSymbols.VersionDescription;
+import org.openjdk.buildtools.symbolgenerator.CreateSymbols;
+import org.openjdk.buildtools.symbolgenerator.CreateSymbols.ClassDescription;
+import org.openjdk.buildtools.symbolgenerator.CreateSymbols.ClassList;
+import org.openjdk.buildtools.symbolgenerator.CreateSymbols.CtSymKind;
+import org.openjdk.buildtools.symbolgenerator.CreateSymbols.ExcludeIncludeList;
+import org.openjdk.buildtools.symbolgenerator.CreateSymbols.VersionDescription;
public class CreateSymbolsTestImpl {
--- old/make/scripts/generate-symbol-data.sh 2020-03-23 19:56:36.599962702 +0100
+++ new/make/scripts/generate-symbol-data.sh 2020-03-23 19:56:36.267962704 +0100
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -73,5 +73,5 @@
--add-exports jdk.compiler/com.sun.tools.javac.jvm=ALL-UNNAMED \
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED \
--add-modules jdk.jdeps \
- ../../../make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java \
+ ../../../src/jdk.compiler/share/tools/org/openjdk/buildtools/symbolgenerator/CreateSymbols.java \
build-description-incremental symbols include.list
--- old/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java 2020-03-23 19:56:37.415962696 +0100
+++ /dev/null 2020-02-11 10:29:13.086348146 +0100
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package build.tools.blacklistedcertsconverter;
-
-import java.security.MessageDigest;
-import java.security.cert.Certificate;
-import java.security.cert.CertificateFactory;
-import java.security.cert.X509Certificate;
-import java.util.Collection;
-import java.util.Set;
-import java.util.TreeSet;
-
-
-/**
- * Converts blacklisted.certs.pem from System.in to blacklisted.certs in
- * System.out. The input must start with a #! line including the fingerprint
- * algorithm. The output is sorted and unique.
- */
-public class BlacklistedCertsConverter {
-
- public static void main(String[] args) throws Exception {
-
- byte[] pattern = "#! java BlacklistedCertsConverter ".getBytes();
- String mdAlg = "";
-
- for (int i=0; ; i++) {
- int n = System.in.read();
- if (n < 0) {
- throw new Exception("Unexpected EOF");
- }
- if (i < pattern.length) {
- if (n != pattern[i]) {
- throw new Exception("The first line must start with \""
- + new String(pattern) + "\"");
- }
- } else if (i < pattern.length + 100) {
- if (n < 32) {
- break;
- } else {
- mdAlg = mdAlg + String.format("%c", n);
- }
- }
- }
-
- mdAlg = mdAlg.trim();
- System.out.println("Algorithm=" + mdAlg);
-
- CertificateFactory cf = CertificateFactory.getInstance("X.509");
- Collection extends Certificate> certs
- = cf.generateCertificates(System.in);
-
- // Output sorted so that it's easy to locate an entry.
- Set fingerprints = new TreeSet<>();
- for (Certificate cert: certs) {
- fingerprints.add(
- getCertificateFingerPrint(mdAlg, (X509Certificate)cert));
- }
-
- for (String s: fingerprints) {
- System.out.println(s);
- }
- }
-
- /**
- * Converts a byte to hex digit and writes to the supplied buffer
- */
- private static void byte2hex(byte b, StringBuffer buf) {
- char[] hexChars = { '0', '1', '2', '3', '4', '5', '6', '7', '8',
- '9', 'A', 'B', 'C', 'D', 'E', 'F' };
- int high = ((b & 0xf0) >> 4);
- int low = (b & 0x0f);
- buf.append(hexChars[high]);
- buf.append(hexChars[low]);
- }
-
- /**
- * Gets the requested finger print of the certificate.
- */
- private static String getCertificateFingerPrint(
- String mdAlg, X509Certificate cert) throws Exception {
- byte[] encCertInfo = cert.getEncoded();
- MessageDigest md = MessageDigest.getInstance(mdAlg);
- byte[] digest = md.digest(encCertInfo);
- StringBuffer buf = new StringBuffer();
- for (int i = 0; i < digest.length; i++) {
- byte2hex(digest[i], buf);
- }
- return buf.toString();
- }
-}
--- /dev/null 2020-02-11 10:29:13.086348146 +0100
+++ new/src/java.base/share/tools/org/openjdk/buildtools/blacklistedcertsconverter/BlacklistedCertsConverter.java 2020-03-23 19:56:37.003962699 +0100
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package org.openjdk.buildtools.blacklistedcertsconverter;
+
+import java.security.MessageDigest;
+import java.security.cert.Certificate;
+import java.security.cert.CertificateFactory;
+import java.security.cert.X509Certificate;
+import java.util.Collection;
+import java.util.Set;
+import java.util.TreeSet;
+
+
+/**
+ * Converts blacklisted.certs.pem from System.in to blacklisted.certs in
+ * System.out. The input must start with a #! line including the fingerprint
+ * algorithm. The output is sorted and unique.
+ */
+public class BlacklistedCertsConverter {
+
+ public static void main(String[] args) throws Exception {
+
+ byte[] pattern = "#! java BlacklistedCertsConverter ".getBytes();
+ String mdAlg = "";
+
+ for (int i=0; ; i++) {
+ int n = System.in.read();
+ if (n < 0) {
+ throw new Exception("Unexpected EOF");
+ }
+ if (i < pattern.length) {
+ if (n != pattern[i]) {
+ throw new Exception("The first line must start with \""
+ + new String(pattern) + "\"");
+ }
+ } else if (i < pattern.length + 100) {
+ if (n < 32) {
+ break;
+ } else {
+ mdAlg = mdAlg + String.format("%c", n);
+ }
+ }
+ }
+
+ mdAlg = mdAlg.trim();
+ System.out.println("Algorithm=" + mdAlg);
+
+ CertificateFactory cf = CertificateFactory.getInstance("X.509");
+ Collection extends Certificate> certs
+ = cf.generateCertificates(System.in);
+
+ // Output sorted so that it's easy to locate an entry.
+ Set fingerprints = new TreeSet<>();
+ for (Certificate cert: certs) {
+ fingerprints.add(
+ getCertificateFingerPrint(mdAlg, (X509Certificate)cert));
+ }
+
+ for (String s: fingerprints) {
+ System.out.println(s);
+ }
+ }
+
+ /**
+ * Converts a byte to hex digit and writes to the supplied buffer
+ */
+ private static void byte2hex(byte b, StringBuffer buf) {
+ char[] hexChars = { '0', '1', '2', '3', '4', '5', '6', '7', '8',
+ '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+ int high = ((b & 0xf0) >> 4);
+ int low = (b & 0x0f);
+ buf.append(hexChars[high]);
+ buf.append(hexChars[low]);
+ }
+
+ /**
+ * Gets the requested finger print of the certificate.
+ */
+ private static String getCertificateFingerPrint(
+ String mdAlg, X509Certificate cert) throws Exception {
+ byte[] encCertInfo = cert.getEncoded();
+ MessageDigest md = MessageDigest.getInstance(mdAlg);
+ byte[] digest = md.digest(encCertInfo);
+ StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < digest.length; i++) {
+ byte2hex(digest[i], buf);
+ }
+ return buf.toString();
+ }
+}
--- old/make/jdk/src/classes/build/tools/generatebreakiteratordata/CharSet.java 2020-03-23 19:56:38.251962690 +0100
+++ /dev/null 2020-02-11 10:29:13.086348146 +0100
@@ -1,819 +0,0 @@
-/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
- * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
- *
- * The original version of this source code and documentation
- * is copyrighted and owned by Taligent, Inc., a wholly-owned
- * subsidiary of IBM. These materials are provided under terms
- * of a License Agreement between Taligent and Sun. This technology
- * is protected by multiple US and International patents.
- *
- * This notice and attribution to Taligent may not be removed.
- * Taligent is a registered trademark of Taligent, Inc.
- */
-
-package build.tools.generatebreakiteratordata;
-
-import java.util.Arrays;
-import java.util.Hashtable;
-
-/**
- * An object representing a set of characters. (This is a "set" in the
- * mathematical sense: an unduplicated list of characters on which set
- * operations such as union and intersection can be performed.) The
- * set information is stored in compressed, optimized form: The object
- * contains an integer array with an even number of characters. Each
- * pair of characters represents a range of characters contained in the set
- * (a pair of the same character represents a single character). The
- * characters are sorted in increasing order.
- */
-class CharSet {
- /**
- * The structure containing the set information. The characters
- * in this array are organized into pairs, each pair representing
- * a range of characters contained in the set
- */
- private int[] chars;
-
- //==========================================================================
- // parseString() and associated routines
- //==========================================================================
- /**
- * A cache which is used to speed up parseString() whenever it is
- * used to parse a description that has been parsed before
- */
- private static Hashtable expressionCache = null;
-
- /**
- * Builds a CharSet based on a textual description. For the syntax of
- * the description, see the documentation of RuleBasedBreakIterator.
- * @see java.text.RuleBasedBreakIterator
- */
- public static CharSet parseString(String s) {
- CharSet result = null;
-
- // if "s" is in the expression cache, pull the result out
- // of the expresison cache
- if (expressionCache != null) {
- result = expressionCache.get(s);
- }
-
- // otherwise, use doParseString() to actually parse the string,
- // and then add a corresponding entry to the expression cache
- if (result == null) {
- result = doParseString(s);
- if (expressionCache == null) {
- expressionCache = new Hashtable<>();
- }
- expressionCache.put(s, result);
- }
- result = (CharSet)(result.clone());
- return result;
- }
-
- /**
- * This function is used by parseString() to actually parse the string
- */
- private static CharSet doParseString(String s) {
- CharSet result = new CharSet();
- int p = 0;
-
- boolean haveDash = false;
- boolean haveTilde = false;
- boolean wIsReal = false;
- int w = 0x0000;
-
- // for each character in the description...
- while (p < s.length()) {
- int c = s.codePointAt(p);
-
- // if it's an opening bracket...
- if (c == '[') {
- // flush the single-character cache
- if (wIsReal) {
- result.internalUnion(new CharSet(w));
- }
-
- // locate the matching closing bracket
- int bracketLevel = 1;
- int q = p + 1;
- while (bracketLevel != 0) {
- // if no matching bracket by end of string then...
- if (q >= s.length()) {
- throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
- }
- int ch = s.codePointAt(q);
- switch (ch) {
- case '\\': // need to step over next character
- ch = s.codePointAt(++q);
- break;
- case '[':
- ++bracketLevel;
- break;
- case ']':
- --bracketLevel;
- break;
- }
- q += Character.charCount(ch);
- }
- --q;
-
- // call parseString() recursively to parse the text inside
- // the brackets, then either add or subtract the result from
- // our running result depending on whether or not the []
- // expresison was preceded by a ^
- if (!haveTilde) {
- result.internalUnion(CharSet.parseString(s.substring(p + 1, q)));
- }
- else {
- result.internalDifference(CharSet.parseString(s.substring(p + 1, q)));
- }
- haveTilde = false;
- haveDash = false;
- wIsReal = false;
- p = q + 1;
- }
-
- // if the character is a colon...
- else if (c == ':') {
- // flush the single-character cache
- if (wIsReal) {
- result.internalUnion(new CharSet(w));
- }
-
- // locate the matching colon (and throw an error if there
- // isn't one)
- int q = s.indexOf(':', p + 1);
- if (q == -1) {
- throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
- }
-
- // use charSetForCategory() to parse the text in the colons,
- // and either add or substract the result from our running
- // result depending on whether the :: expression was
- // preceded by a ^
- if (!haveTilde) {
- result.internalUnion(charSetForCategory(s.substring(p + 1, q)));
- }
- else {
- result.internalDifference(charSetForCategory(s.substring(p + 1, q)));
- }
-
- // reset everything and advance to the next character
- haveTilde = false;
- haveDash = false;
- wIsReal = false;
- p = q + 1;
- }
-
- // if the character is a dash, set an appropriate flag
- else if (c == '-') {
- if (wIsReal) {
- haveDash = true;
- }
- ++p;
- }
-
- // if the character is a caret, flush the single-character
- // cache and set an appropriate flag. If the set is empty
- // (i.e., if the expression begins with ^), invert the set
- // (i.e., set it to include everything). The idea here is
- // that a set that includes nothing but ^ expressions
- // means "everything but these things".
- else if (c == '^') {
- if (wIsReal) {
- result.internalUnion(new CharSet(w));
- wIsReal = false;
- }
- haveTilde = true;
- ++p;
- if (result.empty()) {
- result.internalComplement();
- }
- }
-
- // throw an exception on an illegal character
- else if (c >= ' ' && c < '\u007f' && !Character.isLetter((char)c)
- && !Character.isDigit((char)c) && c != '\\') {
- throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
- }
-
- // otherwise, we end up here...
- else {
- // on a backslash, advance to the next character
- if (c == '\\') {
- ++p;
- }
-
- // if the preceding character was a dash, this character
- // defines the end of a range. Add or subtract that range
- // from the running result depending on whether or not it
- // was preceded by a ^
- if (haveDash) {
- if (s.codePointAt(p) < w) {
- throw new IllegalArgumentException("U+" +
- Integer.toHexString(s.codePointAt(p))
- + " is less than U+" + Integer.toHexString(w) + ". Dash expressions "
- + "can't have their endpoints in reverse order.");
- }
-
- int ch = s.codePointAt(p);
- if (!haveTilde) {
- result.internalUnion(new CharSet(w, ch));
- }
- else {
- result.internalDifference(new CharSet(w, ch));
- }
- p += Character.charCount(ch);
- haveDash = false;
- haveTilde = false;
- wIsReal = false;
- }
-
- // if the preceding character was a caret, remove this character
- // from the running result
- else if (haveTilde) {
- w = s.codePointAt(p);
- result.internalDifference(new CharSet(w));
- p += Character.charCount(w);
- haveTilde = false;
- wIsReal = false;
- }
-
- // otherwise, flush the single-character cache and then
- // put this character into the cache
- else if (wIsReal) {
- result.internalUnion(new CharSet(w));
- w = s.codePointAt(p);
- p += Character.charCount(w);
- wIsReal = true;
- } else {
- w = s.codePointAt(p);
- p += Character.charCount(w);
- wIsReal = true;
- }
- }
- }
-
- // finally, flush the single-character cache one last time
- if (wIsReal) {
- result.internalUnion(new CharSet(w));
- }
-
- return result;
- }
-
- /**
- * Creates a CharSet containing all the characters in a particular
- * Unicode category. The text is either a two-character code from
- * the Unicode database or a single character that begins one or more
- * two-character codes.
- */
- private static CharSet charSetForCategory(String category) {
- // throw an exception if we have anything other than one or two
- // characters inside the colons
- if (category.length() == 0 || category.length() >= 3) {
- throw new IllegalArgumentException("Invalid character category: " + category);
- }
-
- // if we have two characters, search the category map for that code
- // and either construct and return a CharSet from the data in the
- // category map or throw an exception
- if (category.length() == 2) {
- for (int i = 0; i < CharacterCategory.categoryNames.length; i++) {
- if (CharacterCategory.categoryNames[i].equals(category)) {
- return new CharSet(CharacterCategory.getCategoryMap(i));
- }
- }
- throw new IllegalArgumentException("Invalid character category: " + category);
- }
-
- // if we have one character, search the category map for codes beginning
- // with that letter, and union together all of the matching sets that
- // we find (or throw an exception if there are no matches)
- else if (category.length() == 1) {
- CharSet result = new CharSet();
- for (int i = 0; i < CharacterCategory.categoryNames.length; i++) {
- if (CharacterCategory.categoryNames[i].startsWith(category)) {
- result = result.union(new CharSet(CharacterCategory.getCategoryMap(i)));
- }
- }
- if (result.empty()) {
- throw new IllegalArgumentException("Invalid character category: " + category);
- }
- else {
- return result;
- }
- }
- return new CharSet(); // should never get here, but to make the compiler happy...
- }
-
- /**
- * Returns a copy of CharSet's expression cache and sets CharSet's
- * expression cache to empty.
- */
- public static Hashtable releaseExpressionCache() {
- Hashtable result = expressionCache;
- expressionCache = null;
- return result;
- }
-
- //==========================================================================
- // CharSet manipulation
- //==========================================================================
- /**
- * Creates an empty CharSet.
- */
- public CharSet() {
- chars = new int[0];
- }
-
- /**
- * Creates a CharSet containing a single character.
- * @param c The character to put into the CharSet
- */
- public CharSet(int c) {
- chars = new int[2];
- chars[0] = c;
- chars[1] = c;
- }
-
- /**
- * Creates a CharSet containing a range of characters.
- * @param lo The lowest-numbered character to include in the range
- * @param hi The highest-numbered character to include in the range
- */
- public CharSet(int lo, int hi) {
- chars = new int[2];
- if (lo <= hi) {
- chars[0] = lo;
- chars[1] = hi;
- }
- else {
- chars[0] = hi;
- chars[1] = lo;
- }
- }
-
- /**
- * Creates a CharSet, initializing it from the internal storage
- * of another CharSet (this function performs no error checking
- * on "chars", so if it's malformed, undefined behavior will result)
- */
- private CharSet(int[] chars) {
- this.chars = chars;
- }
-
- /**
- * Returns a CharSet representing the union of two CharSets.
- */
- public CharSet union(CharSet that) {
- return new CharSet(doUnion(that.chars));
- }
-
- /**
- * Adds the characters in "that" to this CharSet
- */
- private void internalUnion(CharSet that) {
- chars = doUnion(that.chars);
- }
-
- /**
- * The actual implementation of the union functions
- */
- private int[] doUnion(int[] c2) {
- int[] result = new int[chars.length+c2.length];
-
- int i = 0;
- int j = 0;
- int index = 0;
-
- // consider all the characters in both strings
- while (i < chars.length && j < c2.length) {
- int ub;
-
- // the first character in the result is the lower of the
- // starting characters of the two strings, and "ub" gets
- // set to the upper bound of that range
- if (chars[i] < c2[j]) {
- result[index++] = chars[i];
- ub = chars[++i];
- }
- else {
- result[index++] = c2[j];
- ub = c2[++j];
- }
-
- // for as long as one of our two pointers is pointing to a range's
- // end point, or i is pointing to a character that is less than
- // "ub" plus one (the "plus one" stitches touching ranges together)...
- while (i % 2 == 1 ||
- j % 2 == 1 ||
- (i < chars.length && chars[i] <= ub + 1)) {
-
- // advance i to the first character that is greater than
- // "ub" plus one
- while (i < chars.length && chars[i] <= ub + 1) {
- ++i;
- }
-
- // if i points to the endpoint of a range, update "ub"
- // to that character, or if i points to the start of
- // a range and the endpoint of the preceding range is
- // greater than "ub", update "up" to _that_ character
- if (i % 2 == 1) {
- ub = chars[i];
- }
- else if (i > 0 && chars[i - 1] > ub) {
- ub = chars[i - 1];
- }
-
- // now advance j to the first character that is greater
- // that "ub" plus one
- while (j < c2.length && c2[j] <= ub + 1) {
- ++j;
- }
-
- // if j points to the endpoint of a range, update "ub"
- // to that character, or if j points to the start of
- // a range and the endpoint of the preceding range is
- // greater than "ub", update "up" to _that_ character
- if (j % 2 == 1) {
- ub = c2[j];
- }
- else if (j > 0 && c2[j - 1] > ub) {
- ub = c2[j - 1];
- }
- }
- // when we finally fall out of this loop, we will have stitched
- // together a series of ranges that overlap or touch, i and j
- // will both point to starting points of ranges, and "ub" will
- // be the endpoint of the range we're working on. Write "ub"
- // to the result
- result[index++] = ub;
-
- // loop back around to create the next range in the result
- }
-
- // we fall out to here when we've exhausted all the characters in
- // one of the operands. We can append all of the remaining characters
- // in the other operand without doing any extra work.
- if (i < chars.length) {
- for (int k = i; k < chars.length; k++) {
- result[index++] = chars[k];
- }
- }
- if (j < c2.length) {
- for (int k = j; k < c2.length; k++) {
- result[index++] = c2[k];
- }
- }
-
- if (result.length > index) {
- int[] tmpbuf = new int[index];
- System.arraycopy(result, 0, tmpbuf, 0, index);
- return tmpbuf;
- }
-
- return result;
- }
-
- /**
- * Returns the intersection of two CharSets.
- */
- public CharSet intersection(CharSet that) {
- return new CharSet(doIntersection(that.chars));
- }
-
- /**
- * Removes from this CharSet any characters that aren't also in "that"
- */
- private void internalIntersection(CharSet that) {
- chars = doIntersection(that.chars);
- }
-
- /**
- * The internal implementation of the two intersection functions
- */
- private int[] doIntersection(int[] c2) {
- int[] result = new int[chars.length+c2.length];
-
- int i = 0;
- int j = 0;
- int oldI;
- int oldJ;
- int index = 0;
-
- // iterate until we've exhausted one of the operands
- while (i < chars.length && j < c2.length) {
-
- // advance j until it points to a character that is larger than
- // the one i points to. If this is the beginning of a one-
- // character range, advance j to point to the end
- if (i < chars.length && i % 2 == 0) {
- while (j < c2.length && c2[j] < chars[i]) {
- ++j;
- }
- if (j < c2.length && j % 2 == 0 && c2[j] == chars[i]) {
- ++j;
- }
- }
-
- // if j points to the endpoint of a range, save the current
- // value of i, then advance i until it reaches a character
- // which is larger than the character pointed at
- // by j. All of the characters we've advanced over (except
- // the one currently pointed to by i) are added to the result
- oldI = i;
- while (j % 2 == 1 && i < chars.length && chars[i] <= c2[j]) {
- ++i;
- }
- for (int k = oldI; k < i; k++) {
- result[index++] = chars[k];
- }
-
- // if i points to the endpoint of a range, save the current
- // value of j, then advance j until it reaches a character
- // which is larger than the character pointed at
- // by i. All of the characters we've advanced over (except
- // the one currently pointed to by i) are added to the result
- oldJ = j;
- while (i % 2 == 1 && j < c2.length && c2[j] <= chars[i]) {
- ++j;
- }
- for (int k = oldJ; k < j; k++) {
- result[index++] = c2[k];
- }
-
- // advance i until it points to a character larger than j
- // If it points at the beginning of a one-character range,
- // advance it to the end of that range
- if (j < c2.length && j % 2 == 0) {
- while (i < chars.length && chars[i] < c2[j]) {
- ++i;
- }
- if (i < chars.length && i % 2 == 0 && c2[j] == chars[i]) {
- ++i;
- }
- }
- }
-
- if (result.length > index) {
- int[] tmpbuf = new int[index];
- System.arraycopy(result, 0, tmpbuf, 0, index);
- return tmpbuf;
- }
-
- return result;
- }
-
- /**
- * Returns a CharSet containing all the characters in "this" that
- * aren't also in "that"
- */
- public CharSet difference(CharSet that) {
- return new CharSet(doIntersection(that.doComplement()));
- }
-
- /**
- * Removes from "this" all the characters that are also in "that"
- */
- private void internalDifference(CharSet that) {
- chars = doIntersection(that.doComplement());
- }
-
- /**
- * Returns a CharSet containing all the characters which are not
- * in "this"
- */
- public CharSet complement() {
- return new CharSet(doComplement());
- }
-
- /**
- * Complements "this". All the characters it contains are removed,
- * and all the characters it doesn't contain are added.
- */
- private void internalComplement() {
- chars = doComplement();
- }
-
- /**
- * The internal implementation function for the complement routines
- */
- private int[] doComplement() {
- // the complement of an empty CharSet is one containing everything
- if (empty()) {
- int[] result = new int[2];
- result[0] = 0x0000;
- result[1] = 0x10FFFF;
- return result;
- }
-
- int[] result = new int[chars.length+2];
-
- int i = 0;
- int index = 0;
-
- // the result begins with \u0000 unless the original CharSet does
- if (chars[0] != 0x0000) {
- result[index++] = 0x0000;
- }
-
- // walk through the characters in this CharSet. Append a pair of
- // characters the first of which is one less than the first
- // character we see and the second of which is one plus the second
- // character we see (don't write the first character if it's \u0000,
- // and don't write the second character if it's \uffff.
- while (i < chars.length) {
- if (chars[i] != 0x0000) {
- result[index++] = chars[i] - 1;
- }
- if (chars[i + 1] != 0x10FFFF) {
- result[index++] = chars[i + 1] + 1;
- }
- i += 2;
- }
-
- // add 0x10ffff to the end of the result, unless it was in
- // the original set
- if (chars[i-1] != 0x10FFFF) {
- result[index++] = 0x10FFFF;
- }
-
- if (result.length > index) {
- int[] tmpbuf = new int[index];
- System.arraycopy(result, 0, tmpbuf, 0, index);
- return tmpbuf;
- }
-
- return result;
- }
-
- /**
- * Returns true if this CharSet contains the specified character
- * @param c The character we're testing for set membership
- */
- public boolean contains(int c) {
- // search for the first range endpoint that is greater than or
- // equal to c
- int i = 1;
- while (i < chars.length && chars[i] < c) {
- i += 2;
- }
-
- // if we've walked off the end, we don't contain c
- if (i == chars.length) {
- return false;
- }
-
- // otherwise, we contain c if the beginning of the range is less
- // than or equal to c
- return chars[i - 1] <= c;
- }
-
- /**
- * Returns true if "that" is another instance of CharSet containing
- * the exact same characters as this one
- */
- public boolean equals(Object that) {
- return (that instanceof CharSet) && Arrays.equals(chars, ((CharSet)that).chars);
- }
-
- /**
- * Returns the hash code for this set of characters
- */
- public int hashCode() {
- return Arrays.hashCode(chars);
- }
-
- /**
- * Creates a new CharSet that is equal to this one
- */
- public Object clone() {
- return new CharSet(chars);
- }
-
- /**
- * Returns true if this CharSet contains no characters
- */
- public boolean empty() {
- return chars.length == 0;
- }
-
- /**
- * Returns a textual representation of this CharSet. If the result
- * of calling this function is passed to CharSet.parseString(), it
- * will produce another CharSet that is equal to this one.
- */
- public String toString() {
- StringBuffer result = new StringBuffer();
-
- // the result begins with an opening bracket
- result.append('[');
-
- // iterate through the ranges in the CharSet
- for (int i = 0; i < chars.length; i += 2) {
- // for a range with the same beginning and ending point,
- // output that character
- if (chars[i] == chars[i + 1]) {
- result.append("0x");
- result.append(Integer.toHexString(chars[i]));
- }
-
- // otherwise, output the start and end points of the range
- // separated by a dash
- else {
- result.append("0x");
- result.append(Integer.toHexString(chars[i]));
- result.append("-0x");
- result.append(Integer.toHexString(chars[i + 1]));
- }
- }
-
- // the result ends with a closing bracket
- result.append(']');
- return result.toString();
- }
-
- /**
- * Returns an integer array representing the contents of this CharSet
- * in the same form in which they're stored internally: as pairs
- * of characters representing the start and end points of ranges
- */
- public int[] getRanges() {
- return chars;
- }
-
- /**
- * Returns an Enumeration that will return the ranges of characters
- * contained in this CharSet one at a time
- */
- public Enumeration getChars() {
- return new Enumeration(this);
- }
-
- //==========================================================================
- // CharSet.Enumeration
- //==========================================================================
-
- /**
- * An Enumeration that can be used to extract the character ranges
- * from a CharSet one at a time
- */
- public class Enumeration implements java.util.Enumeration {
- /**
- * Initializes a CharSet.Enumeration
- */
- Enumeration(CharSet cs) {
- this.chars = cs.chars;
- p = 0;
- }
-
- /**
- * Returns true if the enumeration hasn't yet returned
- * all the ranges in the CharSet
- */
- public boolean hasMoreElements() {
- return p < chars.length;
- }
-
- /**
- * Returns the next range in the CarSet
- */
- public int[] nextElement() {
- int[] result = new int[2];
- result[0] = chars[p++];
- result[1] = chars[p++];
- return result;
- }
-
- int p;
- int[] chars;
- }
-}
--- /dev/null 2020-02-11 10:29:13.086348146 +0100
+++ new/src/java.base/share/tools/org/openjdk/buildtools/generatebreakiteratordata/CharSet.java 2020-03-23 19:56:37.823962693 +0100
@@ -0,0 +1,819 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
+ * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
+ *
+ * The original version of this source code and documentation
+ * is copyrighted and owned by Taligent, Inc., a wholly-owned
+ * subsidiary of IBM. These materials are provided under terms
+ * of a License Agreement between Taligent and Sun. This technology
+ * is protected by multiple US and International patents.
+ *
+ * This notice and attribution to Taligent may not be removed.
+ * Taligent is a registered trademark of Taligent, Inc.
+ */
+
+package org.openjdk.buildtools.generatebreakiteratordata;
+
+import java.util.Arrays;
+import java.util.Hashtable;
+
+/**
+ * An object representing a set of characters. (This is a "set" in the
+ * mathematical sense: an unduplicated list of characters on which set
+ * operations such as union and intersection can be performed.) The
+ * set information is stored in compressed, optimized form: The object
+ * contains an integer array with an even number of characters. Each
+ * pair of characters represents a range of characters contained in the set
+ * (a pair of the same character represents a single character). The
+ * characters are sorted in increasing order.
+ */
+class CharSet {
+ /**
+ * The structure containing the set information. The characters
+ * in this array are organized into pairs, each pair representing
+ * a range of characters contained in the set
+ */
+ private int[] chars;
+
+ //==========================================================================
+ // parseString() and associated routines
+ //==========================================================================
+ /**
+ * A cache which is used to speed up parseString() whenever it is
+ * used to parse a description that has been parsed before
+ */
+ private static Hashtable expressionCache = null;
+
+ /**
+ * Builds a CharSet based on a textual description. For the syntax of
+ * the description, see the documentation of RuleBasedBreakIterator.
+ * @see java.text.RuleBasedBreakIterator
+ */
+ public static CharSet parseString(String s) {
+ CharSet result = null;
+
+ // if "s" is in the expression cache, pull the result out
+ // of the expresison cache
+ if (expressionCache != null) {
+ result = expressionCache.get(s);
+ }
+
+ // otherwise, use doParseString() to actually parse the string,
+ // and then add a corresponding entry to the expression cache
+ if (result == null) {
+ result = doParseString(s);
+ if (expressionCache == null) {
+ expressionCache = new Hashtable<>();
+ }
+ expressionCache.put(s, result);
+ }
+ result = (CharSet)(result.clone());
+ return result;
+ }
+
+ /**
+ * This function is used by parseString() to actually parse the string
+ */
+ private static CharSet doParseString(String s) {
+ CharSet result = new CharSet();
+ int p = 0;
+
+ boolean haveDash = false;
+ boolean haveTilde = false;
+ boolean wIsReal = false;
+ int w = 0x0000;
+
+ // for each character in the description...
+ while (p < s.length()) {
+ int c = s.codePointAt(p);
+
+ // if it's an opening bracket...
+ if (c == '[') {
+ // flush the single-character cache
+ if (wIsReal) {
+ result.internalUnion(new CharSet(w));
+ }
+
+ // locate the matching closing bracket
+ int bracketLevel = 1;
+ int q = p + 1;
+ while (bracketLevel != 0) {
+ // if no matching bracket by end of string then...
+ if (q >= s.length()) {
+ throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
+ }
+ int ch = s.codePointAt(q);
+ switch (ch) {
+ case '\\': // need to step over next character
+ ch = s.codePointAt(++q);
+ break;
+ case '[':
+ ++bracketLevel;
+ break;
+ case ']':
+ --bracketLevel;
+ break;
+ }
+ q += Character.charCount(ch);
+ }
+ --q;
+
+ // call parseString() recursively to parse the text inside
+ // the brackets, then either add or subtract the result from
+ // our running result depending on whether or not the []
+ // expresison was preceded by a ^
+ if (!haveTilde) {
+ result.internalUnion(CharSet.parseString(s.substring(p + 1, q)));
+ }
+ else {
+ result.internalDifference(CharSet.parseString(s.substring(p + 1, q)));
+ }
+ haveTilde = false;
+ haveDash = false;
+ wIsReal = false;
+ p = q + 1;
+ }
+
+ // if the character is a colon...
+ else if (c == ':') {
+ // flush the single-character cache
+ if (wIsReal) {
+ result.internalUnion(new CharSet(w));
+ }
+
+ // locate the matching colon (and throw an error if there
+ // isn't one)
+ int q = s.indexOf(':', p + 1);
+ if (q == -1) {
+ throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
+ }
+
+ // use charSetForCategory() to parse the text in the colons,
+ // and either add or substract the result from our running
+ // result depending on whether the :: expression was
+ // preceded by a ^
+ if (!haveTilde) {
+ result.internalUnion(charSetForCategory(s.substring(p + 1, q)));
+ }
+ else {
+ result.internalDifference(charSetForCategory(s.substring(p + 1, q)));
+ }
+
+ // reset everything and advance to the next character
+ haveTilde = false;
+ haveDash = false;
+ wIsReal = false;
+ p = q + 1;
+ }
+
+ // if the character is a dash, set an appropriate flag
+ else if (c == '-') {
+ if (wIsReal) {
+ haveDash = true;
+ }
+ ++p;
+ }
+
+ // if the character is a caret, flush the single-character
+ // cache and set an appropriate flag. If the set is empty
+ // (i.e., if the expression begins with ^), invert the set
+ // (i.e., set it to include everything). The idea here is
+ // that a set that includes nothing but ^ expressions
+ // means "everything but these things".
+ else if (c == '^') {
+ if (wIsReal) {
+ result.internalUnion(new CharSet(w));
+ wIsReal = false;
+ }
+ haveTilde = true;
+ ++p;
+ if (result.empty()) {
+ result.internalComplement();
+ }
+ }
+
+ // throw an exception on an illegal character
+ else if (c >= ' ' && c < '\u007f' && !Character.isLetter((char)c)
+ && !Character.isDigit((char)c) && c != '\\') {
+ throw new IllegalArgumentException("Parse error at position " + p + " in " + s);
+ }
+
+ // otherwise, we end up here...
+ else {
+ // on a backslash, advance to the next character
+ if (c == '\\') {
+ ++p;
+ }
+
+ // if the preceding character was a dash, this character
+ // defines the end of a range. Add or subtract that range
+ // from the running result depending on whether or not it
+ // was preceded by a ^
+ if (haveDash) {
+ if (s.codePointAt(p) < w) {
+ throw new IllegalArgumentException("U+" +
+ Integer.toHexString(s.codePointAt(p))
+ + " is less than U+" + Integer.toHexString(w) + ". Dash expressions "
+ + "can't have their endpoints in reverse order.");
+ }
+
+ int ch = s.codePointAt(p);
+ if (!haveTilde) {
+ result.internalUnion(new CharSet(w, ch));
+ }
+ else {
+ result.internalDifference(new CharSet(w, ch));
+ }
+ p += Character.charCount(ch);
+ haveDash = false;
+ haveTilde = false;
+ wIsReal = false;
+ }
+
+ // if the preceding character was a caret, remove this character
+ // from the running result
+ else if (haveTilde) {
+ w = s.codePointAt(p);
+ result.internalDifference(new CharSet(w));
+ p += Character.charCount(w);
+ haveTilde = false;
+ wIsReal = false;
+ }
+
+ // otherwise, flush the single-character cache and then
+ // put this character into the cache
+ else if (wIsReal) {
+ result.internalUnion(new CharSet(w));
+ w = s.codePointAt(p);
+ p += Character.charCount(w);
+ wIsReal = true;
+ } else {
+ w = s.codePointAt(p);
+ p += Character.charCount(w);
+ wIsReal = true;
+ }
+ }
+ }
+
+ // finally, flush the single-character cache one last time
+ if (wIsReal) {
+ result.internalUnion(new CharSet(w));
+ }
+
+ return result;
+ }
+
+ /**
+ * Creates a CharSet containing all the characters in a particular
+ * Unicode category. The text is either a two-character code from
+ * the Unicode database or a single character that begins one or more
+ * two-character codes.
+ */
+ private static CharSet charSetForCategory(String category) {
+ // throw an exception if we have anything other than one or two
+ // characters inside the colons
+ if (category.length() == 0 || category.length() >= 3) {
+ throw new IllegalArgumentException("Invalid character category: " + category);
+ }
+
+ // if we have two characters, search the category map for that code
+ // and either construct and return a CharSet from the data in the
+ // category map or throw an exception
+ if (category.length() == 2) {
+ for (int i = 0; i < CharacterCategory.categoryNames.length; i++) {
+ if (CharacterCategory.categoryNames[i].equals(category)) {
+ return new CharSet(CharacterCategory.getCategoryMap(i));
+ }
+ }
+ throw new IllegalArgumentException("Invalid character category: " + category);
+ }
+
+ // if we have one character, search the category map for codes beginning
+ // with that letter, and union together all of the matching sets that
+ // we find (or throw an exception if there are no matches)
+ else if (category.length() == 1) {
+ CharSet result = new CharSet();
+ for (int i = 0; i < CharacterCategory.categoryNames.length; i++) {
+ if (CharacterCategory.categoryNames[i].startsWith(category)) {
+ result = result.union(new CharSet(CharacterCategory.getCategoryMap(i)));
+ }
+ }
+ if (result.empty()) {
+ throw new IllegalArgumentException("Invalid character category: " + category);
+ }
+ else {
+ return result;
+ }
+ }
+ return new CharSet(); // should never get here, but to make the compiler happy...
+ }
+
+ /**
+ * Returns a copy of CharSet's expression cache and sets CharSet's
+ * expression cache to empty.
+ */
+ public static Hashtable releaseExpressionCache() {
+ Hashtable result = expressionCache;
+ expressionCache = null;
+ return result;
+ }
+
+ //==========================================================================
+ // CharSet manipulation
+ //==========================================================================
+ /**
+ * Creates an empty CharSet.
+ */
+ public CharSet() {
+ chars = new int[0];
+ }
+
+ /**
+ * Creates a CharSet containing a single character.
+ * @param c The character to put into the CharSet
+ */
+ public CharSet(int c) {
+ chars = new int[2];
+ chars[0] = c;
+ chars[1] = c;
+ }
+
+ /**
+ * Creates a CharSet containing a range of characters.
+ * @param lo The lowest-numbered character to include in the range
+ * @param hi The highest-numbered character to include in the range
+ */
+ public CharSet(int lo, int hi) {
+ chars = new int[2];
+ if (lo <= hi) {
+ chars[0] = lo;
+ chars[1] = hi;
+ }
+ else {
+ chars[0] = hi;
+ chars[1] = lo;
+ }
+ }
+
+ /**
+ * Creates a CharSet, initializing it from the internal storage
+ * of another CharSet (this function performs no error checking
+ * on "chars", so if it's malformed, undefined behavior will result)
+ */
+ private CharSet(int[] chars) {
+ this.chars = chars;
+ }
+
+ /**
+ * Returns a CharSet representing the union of two CharSets.
+ */
+ public CharSet union(CharSet that) {
+ return new CharSet(doUnion(that.chars));
+ }
+
+ /**
+ * Adds the characters in "that" to this CharSet
+ */
+ private void internalUnion(CharSet that) {
+ chars = doUnion(that.chars);
+ }
+
+ /**
+ * The actual implementation of the union functions
+ */
+ private int[] doUnion(int[] c2) {
+ int[] result = new int[chars.length+c2.length];
+
+ int i = 0;
+ int j = 0;
+ int index = 0;
+
+ // consider all the characters in both strings
+ while (i < chars.length && j < c2.length) {
+ int ub;
+
+ // the first character in the result is the lower of the
+ // starting characters of the two strings, and "ub" gets
+ // set to the upper bound of that range
+ if (chars[i] < c2[j]) {
+ result[index++] = chars[i];
+ ub = chars[++i];
+ }
+ else {
+ result[index++] = c2[j];
+ ub = c2[++j];
+ }
+
+ // for as long as one of our two pointers is pointing to a range's
+ // end point, or i is pointing to a character that is less than
+ // "ub" plus one (the "plus one" stitches touching ranges together)...
+ while (i % 2 == 1 ||
+ j % 2 == 1 ||
+ (i < chars.length && chars[i] <= ub + 1)) {
+
+ // advance i to the first character that is greater than
+ // "ub" plus one
+ while (i < chars.length && chars[i] <= ub + 1) {
+ ++i;
+ }
+
+ // if i points to the endpoint of a range, update "ub"
+ // to that character, or if i points to the start of
+ // a range and the endpoint of the preceding range is
+ // greater than "ub", update "up" to _that_ character
+ if (i % 2 == 1) {
+ ub = chars[i];
+ }
+ else if (i > 0 && chars[i - 1] > ub) {
+ ub = chars[i - 1];
+ }
+
+ // now advance j to the first character that is greater
+ // that "ub" plus one
+ while (j < c2.length && c2[j] <= ub + 1) {
+ ++j;
+ }
+
+ // if j points to the endpoint of a range, update "ub"
+ // to that character, or if j points to the start of
+ // a range and the endpoint of the preceding range is
+ // greater than "ub", update "up" to _that_ character
+ if (j % 2 == 1) {
+ ub = c2[j];
+ }
+ else if (j > 0 && c2[j - 1] > ub) {
+ ub = c2[j - 1];
+ }
+ }
+ // when we finally fall out of this loop, we will have stitched
+ // together a series of ranges that overlap or touch, i and j
+ // will both point to starting points of ranges, and "ub" will
+ // be the endpoint of the range we're working on. Write "ub"
+ // to the result
+ result[index++] = ub;
+
+ // loop back around to create the next range in the result
+ }
+
+ // we fall out to here when we've exhausted all the characters in
+ // one of the operands. We can append all of the remaining characters
+ // in the other operand without doing any extra work.
+ if (i < chars.length) {
+ for (int k = i; k < chars.length; k++) {
+ result[index++] = chars[k];
+ }
+ }
+ if (j < c2.length) {
+ for (int k = j; k < c2.length; k++) {
+ result[index++] = c2[k];
+ }
+ }
+
+ if (result.length > index) {
+ int[] tmpbuf = new int[index];
+ System.arraycopy(result, 0, tmpbuf, 0, index);
+ return tmpbuf;
+ }
+
+ return result;
+ }
+
+ /**
+ * Returns the intersection of two CharSets.
+ */
+ public CharSet intersection(CharSet that) {
+ return new CharSet(doIntersection(that.chars));
+ }
+
+ /**
+ * Removes from this CharSet any characters that aren't also in "that"
+ */
+ private void internalIntersection(CharSet that) {
+ chars = doIntersection(that.chars);
+ }
+
+ /**
+ * The internal implementation of the two intersection functions
+ */
+ private int[] doIntersection(int[] c2) {
+ int[] result = new int[chars.length+c2.length];
+
+ int i = 0;
+ int j = 0;
+ int oldI;
+ int oldJ;
+ int index = 0;
+
+ // iterate until we've exhausted one of the operands
+ while (i < chars.length && j < c2.length) {
+
+ // advance j until it points to a character that is larger than
+ // the one i points to. If this is the beginning of a one-
+ // character range, advance j to point to the end
+ if (i < chars.length && i % 2 == 0) {
+ while (j < c2.length && c2[j] < chars[i]) {
+ ++j;
+ }
+ if (j < c2.length && j % 2 == 0 && c2[j] == chars[i]) {
+ ++j;
+ }
+ }
+
+ // if j points to the endpoint of a range, save the current
+ // value of i, then advance i until it reaches a character
+ // which is larger than the character pointed at
+ // by j. All of the characters we've advanced over (except
+ // the one currently pointed to by i) are added to the result
+ oldI = i;
+ while (j % 2 == 1 && i < chars.length && chars[i] <= c2[j]) {
+ ++i;
+ }
+ for (int k = oldI; k < i; k++) {
+ result[index++] = chars[k];
+ }
+
+ // if i points to the endpoint of a range, save the current
+ // value of j, then advance j until it reaches a character
+ // which is larger than the character pointed at
+ // by i. All of the characters we've advanced over (except
+ // the one currently pointed to by i) are added to the result
+ oldJ = j;
+ while (i % 2 == 1 && j < c2.length && c2[j] <= chars[i]) {
+ ++j;
+ }
+ for (int k = oldJ; k < j; k++) {
+ result[index++] = c2[k];
+ }
+
+ // advance i until it points to a character larger than j
+ // If it points at the beginning of a one-character range,
+ // advance it to the end of that range
+ if (j < c2.length && j % 2 == 0) {
+ while (i < chars.length && chars[i] < c2[j]) {
+ ++i;
+ }
+ if (i < chars.length && i % 2 == 0 && c2[j] == chars[i]) {
+ ++i;
+ }
+ }
+ }
+
+ if (result.length > index) {
+ int[] tmpbuf = new int[index];
+ System.arraycopy(result, 0, tmpbuf, 0, index);
+ return tmpbuf;
+ }
+
+ return result;
+ }
+
+ /**
+ * Returns a CharSet containing all the characters in "this" that
+ * aren't also in "that"
+ */
+ public CharSet difference(CharSet that) {
+ return new CharSet(doIntersection(that.doComplement()));
+ }
+
+ /**
+ * Removes from "this" all the characters that are also in "that"
+ */
+ private void internalDifference(CharSet that) {
+ chars = doIntersection(that.doComplement());
+ }
+
+ /**
+ * Returns a CharSet containing all the characters which are not
+ * in "this"
+ */
+ public CharSet complement() {
+ return new CharSet(doComplement());
+ }
+
+ /**
+ * Complements "this". All the characters it contains are removed,
+ * and all the characters it doesn't contain are added.
+ */
+ private void internalComplement() {
+ chars = doComplement();
+ }
+
+ /**
+ * The internal implementation function for the complement routines
+ */
+ private int[] doComplement() {
+ // the complement of an empty CharSet is one containing everything
+ if (empty()) {
+ int[] result = new int[2];
+ result[0] = 0x0000;
+ result[1] = 0x10FFFF;
+ return result;
+ }
+
+ int[] result = new int[chars.length+2];
+
+ int i = 0;
+ int index = 0;
+
+ // the result begins with \u0000 unless the original CharSet does
+ if (chars[0] != 0x0000) {
+ result[index++] = 0x0000;
+ }
+
+ // walk through the characters in this CharSet. Append a pair of
+ // characters the first of which is one less than the first
+ // character we see and the second of which is one plus the second
+ // character we see (don't write the first character if it's \u0000,
+ // and don't write the second character if it's \uffff.
+ while (i < chars.length) {
+ if (chars[i] != 0x0000) {
+ result[index++] = chars[i] - 1;
+ }
+ if (chars[i + 1] != 0x10FFFF) {
+ result[index++] = chars[i + 1] + 1;
+ }
+ i += 2;
+ }
+
+ // add 0x10ffff to the end of the result, unless it was in
+ // the original set
+ if (chars[i-1] != 0x10FFFF) {
+ result[index++] = 0x10FFFF;
+ }
+
+ if (result.length > index) {
+ int[] tmpbuf = new int[index];
+ System.arraycopy(result, 0, tmpbuf, 0, index);
+ return tmpbuf;
+ }
+
+ return result;
+ }
+
+ /**
+ * Returns true if this CharSet contains the specified character
+ * @param c The character we're testing for set membership
+ */
+ public boolean contains(int c) {
+ // search for the first range endpoint that is greater than or
+ // equal to c
+ int i = 1;
+ while (i < chars.length && chars[i] < c) {
+ i += 2;
+ }
+
+ // if we've walked off the end, we don't contain c
+ if (i == chars.length) {
+ return false;
+ }
+
+ // otherwise, we contain c if the beginning of the range is less
+ // than or equal to c
+ return chars[i - 1] <= c;
+ }
+
+ /**
+ * Returns true if "that" is another instance of CharSet containing
+ * the exact same characters as this one
+ */
+ public boolean equals(Object that) {
+ return (that instanceof CharSet) && Arrays.equals(chars, ((CharSet)that).chars);
+ }
+
+ /**
+ * Returns the hash code for this set of characters
+ */
+ public int hashCode() {
+ return Arrays.hashCode(chars);
+ }
+
+ /**
+ * Creates a new CharSet that is equal to this one
+ */
+ public Object clone() {
+ return new CharSet(chars);
+ }
+
+ /**
+ * Returns true if this CharSet contains no characters
+ */
+ public boolean empty() {
+ return chars.length == 0;
+ }
+
+ /**
+ * Returns a textual representation of this CharSet. If the result
+ * of calling this function is passed to CharSet.parseString(), it
+ * will produce another CharSet that is equal to this one.
+ */
+ public String toString() {
+ StringBuffer result = new StringBuffer();
+
+ // the result begins with an opening bracket
+ result.append('[');
+
+ // iterate through the ranges in the CharSet
+ for (int i = 0; i < chars.length; i += 2) {
+ // for a range with the same beginning and ending point,
+ // output that character
+ if (chars[i] == chars[i + 1]) {
+ result.append("0x");
+ result.append(Integer.toHexString(chars[i]));
+ }
+
+ // otherwise, output the start and end points of the range
+ // separated by a dash
+ else {
+ result.append("0x");
+ result.append(Integer.toHexString(chars[i]));
+ result.append("-0x");
+ result.append(Integer.toHexString(chars[i + 1]));
+ }
+ }
+
+ // the result ends with a closing bracket
+ result.append(']');
+ return result.toString();
+ }
+
+ /**
+ * Returns an integer array representing the contents of this CharSet
+ * in the same form in which they're stored internally: as pairs
+ * of characters representing the start and end points of ranges
+ */
+ public int[] getRanges() {
+ return chars;
+ }
+
+ /**
+ * Returns an Enumeration that will return the ranges of characters
+ * contained in this CharSet one at a time
+ */
+ public Enumeration getChars() {
+ return new Enumeration(this);
+ }
+
+ //==========================================================================
+ // CharSet.Enumeration
+ //==========================================================================
+
+ /**
+ * An Enumeration that can be used to extract the character ranges
+ * from a CharSet one at a time
+ */
+ public class Enumeration implements java.util.Enumeration {
+ /**
+ * Initializes a CharSet.Enumeration
+ */
+ Enumeration(CharSet cs) {
+ this.chars = cs.chars;
+ p = 0;
+ }
+
+ /**
+ * Returns true if the enumeration hasn't yet returned
+ * all the ranges in the CharSet
+ */
+ public boolean hasMoreElements() {
+ return p < chars.length;
+ }
+
+ /**
+ * Returns the next range in the CarSet
+ */
+ public int[] nextElement() {
+ int[] result = new int[2];
+ result[0] = chars[p++];
+ result[1] = chars[p++];
+ return result;
+ }
+
+ int p;
+ int[] chars;
+ }
+}
--- old/make/jdk/src/classes/build/tools/generatebreakiteratordata/CharacterCategory.java 2020-03-23 19:56:39.091962684 +0100
+++ /dev/null 2020-02-11 10:29:13.086348146 +0100
@@ -1,697 +0,0 @@
-/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/**
- * This is a tool to generate categoryNames and categoryMap which are used in
- * CharSet.java.
- */
-
-package build.tools.generatebreakiteratordata;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.util.StringTokenizer;
-
-class CharacterCategory {
-
- /**
- * A list of Unicode category names.
- */
- static final String[] categoryNames = {
- "Ll", /* Letter, Lowercase */
- "Lu", /* Letter, Uppercase */
- "Lt", /* Letter, Titlecase */
- "Lo", /* Letter, Other */
- "Lm", /* Letter, Modifier */
- "Nd", /* Number, Decimal Digit */
- "Nl", /* Number, Letter */
- "No", /* Number, Other */
- "Ps", /* Punctuation, Open */
- "Pe", /* Punctuation, Close */
- "Pi", /* Punctuation, Initial quote */
- "Pf", /* Punctuation, Final quote */
- "Pd", /* Punctuation, Dash */
- "Pc", /* Punctuation, Connector */
- "Po", /* Punctuation, Other */
- "Sc", /* Symbol, Currency */
- "Sm", /* Symbol, Math */
- "So", /* Symbol, Other */
- "Mn", /* Mark, Non-Spacing */
- "Mc", /* Mark, Spacing Combining */
- "Me", /* Mark, Enclosing */
- "Zl", /* Separator, Line */
- "Zp", /* Separator, Paragraph */
- "Zs", /* Separator, Space */
- "Cc", /* Other, Control */
- "Cf", /* Other, Format */
- "--", /* Dummy, ignored */
- // Don't add anything after the Dummy entry!!
- };
-
- /**
- * A array of Unicode code points for each category.
- */
- private static int[][] categoryMap;
-
-
- /**
- * Generates CategoryMap for GenerateBreakIteratorData.
- */
- static void makeCategoryMap(String filename) {
- /* Overwrite specfile name */
- specfile = filename;
-
- /* Generate data in current format (1.5.0) */
- generateNewData();
-
- /* Copy generated data to cateogyMap */
- categoryMap = new int[categoryNames.length-1][];
- for (int i = 0; i < categoryNames.length-1; i++) {
- int len = newListCount[BMP][i] + newListCount[nonBMP][i];
- categoryMap[i] = new int[len];
- System.arraycopy(newList[i], 0, categoryMap[i], 0, len);
- }
- }
-
- /**
- * Returns categoryMap for the given category.
- */
- static int[] getCategoryMap(int category) {
- return categoryMap[category];
- }
-
-
- /**
- * Only used for debugging and generating a test program.
- */
- public static void main(String[] args) {
- /* Parses command-line options */
- processArgs(args);
-
- /* Generates data in current format (1.5.0) */
- generateNewData();
-
- /*
- * Generates data in older format (1.4.X and earlier) and creates
- * the old CategoryMap if "oldFilename" is not null.
- */
- if (!oldDatafile.equals("")) {
- generateOldData();
- generateOldDatafile();
- }
-
- /* Displays summary of generated data */
- showSummary();
-
- /*
- * Generates a test program which compares the new data and the return
- * values of Character.getType().
- * and the old data and the new data.
- */
- generateTestProgram();
- }
-
-
- /**
- * Spec (Unicode data file)
- */
- private static String specfile = "UnicodeData.txt";
-
- /**
- * Output directory
- */
- private static String outputDir = "";
-
- /**
- * Old data filename
- */
- private static String oldDatafile = "";
-
- /**
- * Parses the specified arguments and sets up the variables.
- */
- private static void processArgs(String[] args) {
- for (int i = 0; i < args.length; i++) {
- String arg =args[i];
- if (arg.equals("-spec")) {
- specfile = args[++i];
- } else if (arg.equals("-old")) {
- oldDatafile = args[++i];
- } else if (arg.equals("-o")) {
- outputDir = args[++i];
- } else {
- System.err.println("Usage: java CharacterCategory [-spec specfile]");
- System.exit(1);
- }
- }
- }
-
-
- /**
- * Displays summary of generated data
- */
- private static void showSummary() {
- int oldSum = 0;
- int newSum = 0;
- int oldSuppSum = 0;
- int newSuppSum = 0;
-
- for (int i = 0; i < categoryNames.length-1; i++) {
- int newNum = newListCount[BMP][i] + newListCount[nonBMP][i];
-
- if (oldTotalCount[i] != newNum) {
- System.err.println("Error: The number of generated data is different between the new approach and the old approach.");
- }
- if (oldListCount[SURROGATE][i] != newListCount[nonBMP][i]) {
- System.err.println("Error: The number of generated supplementarycharacters is different between the new approach and the old approach.");
- }
-
- System.out.println(" " + categoryNames[i] + ": " +
- oldTotalCount[i] +
- "(" + oldListCount[BEFORE][i] +
- " + " + oldListCount[SURROGATE][i] +
- " + " + oldListCount[AFTER][i] + ")" +
- " --- " + newNum +
- "(" + newListCount[BMP][i] +
- " + " + newListCount[nonBMP][i] + ")");
-
- oldSum += oldListCount[BEFORE][i] * 2 +
- oldListCount[SURROGATE][i] * 4 +
- oldListCount[AFTER][i] * 2;
- newSum += newNum * 4 ;
- oldSuppSum += oldListCount[SURROGATE][i] * 4;
- newSuppSum += newListCount[nonBMP][i] * 4;
- }
-
- System.out.println("\nTotal buffer sizes are:\n " +
- oldSum + "bytes(Including " + oldSuppSum +
- "bytes for supplementary characters)\n " +
- newSum + "bytes(Including " + newSuppSum +
- "bytes for supplementary characters)");
-
- if (!ignoredOld.toString().equals(ignoredNew.toString())) {
- System.err.println("Ignored categories: Error: List mismatch: " +
- ignoredOld + " vs. " + ignoredNew);
- } else {
- System.out.println("\nIgnored categories: " + ignoredOld);
- System.out.println("Please confirm that they aren't used in BreakIteratorRules.");
- }
- }
-
-
- private static final int HighSurrogate_CodeUnit_Start = 0xD800;
- private static final int LowSurrogate_CodeUnit_Start = 0xDC00;
- private static final int Supplementary_CodePoint_Start = 0x10000;
-
-
- private static StringBuffer ignoredOld = new StringBuffer();
- private static int[] oldTotalCount = new int[categoryNames.length];
- private static int[][] oldListCount = new int[3][categoryNames.length];
- private static int[][] oldListLen = new int[3][categoryNames.length];
- private static StringBuffer[][] oldList = new StringBuffer[3][categoryNames.length];
-
- private static final int BEFORE = 0;
- private static final int SURROGATE = 1;
- private static final int AFTER = 2;
-
- /**
- * Makes CategoryMap in ordler format which had been used by JDK 1.4.X and
- * earlier versions.
- */
- private static void generateOldData() {
- /* Initialize arrays. */
- for (int i = 0; i")) {
- setFirst = false;
- } else {
- appendOldChar(prevIndex, prevCodeValue, prevCode);
- appendOldChar(index, curCodeValue, code);
- }
- }
- prevCodeValue = curCodeValue;
- prevCode = code;
- if (characterName.endsWith(" First>")) {
- setFirst = true;
- }
- } else {
- if (ignoredOld.indexOf(category) == -1) {
- ignoredOld.append(category);
- ignoredOld.append(' ');
- }
- }
- }
- appendOldChar(prevIndex, prevCodeValue, prevCode);
-
- bin.close();
- fin.close();
- }
- catch (Exception e) {
- throw new InternalError(e.toString());
- }
- }
-
- private static void appendOldChar(int index, int code, String s) {
- int range;
- if (code < HighSurrogate_CodeUnit_Start) {
- range = BEFORE;
- } else if (code < Supplementary_CodePoint_Start) {
- range = AFTER;
- } else {
- range = SURROGATE;
- }
-
- if (oldListLen[range][index] > 64) {
- oldList[range][index].append("\"\n + \"");
- oldListLen[range][index] = 19;
- }
-
- if (code == 0x22 || code == 0x5c) {
- oldList[range][index].append('\\');
- oldList[range][index].append((char)code);
- oldListLen[range][index] += 2;
- } else if (code > 0x20 && code < 0x7F) {
- oldList[range][index].append((char)code);
- oldListLen[range][index] ++;
- } else {
- if (range == SURROGATE) {// Need to convert code point to code unit
- oldList[range][index].append(toCodeUnit(code));
- oldListLen[range][index] += 12;
- } else {
- oldList[range][index].append("\\u");
- oldList[range][index].append(s);
- oldListLen[range][index] += 6;
- }
- }
- oldListCount[range][index] ++;
- oldTotalCount[index]++;
- }
-
- private static String toCodeUnit(int i) {
- StringBuffer sb = new StringBuffer();
- sb.append("\\u");
- sb.append(Integer.toString((i - Supplementary_CodePoint_Start) / 0x400 + HighSurrogate_CodeUnit_Start, 16).toUpperCase());
- sb.append("\\u");
- sb.append(Integer.toString(i % 0x400 + LowSurrogate_CodeUnit_Start, 16).toUpperCase());
- return sb.toString();
- }
-
- private static int toCodePoint(String s) {
- char c1 = s.charAt(0);
-
- if (s.length() == 1 || !Character.isHighSurrogate(c1)) {
- return (int)c1;
- } else {
- char c2 = s.charAt(1);
- if (s.length() != 2 || !Character.isLowSurrogate(c2)) {
- return -1;
- }
- return Character.toCodePoint(c1, c2);
- }
- }
-
-
- private static StringBuffer ignoredNew = new StringBuffer();
- private static int[] newTotalCount = new int[categoryNames.length];
- private static int[][] newListCount = new int[2][categoryNames.length];
- private static int[][] newList = new int[categoryNames.length][];
-
- private static final int BMP = 0;
- private static final int nonBMP = 1;
-
- /**
- * Makes CategoryMap in newer format which is used by JDK 1.5.0.
- */
- private static void generateNewData() {
- /* Initialize arrays. */
- for (int i = 0; i")) {
- setFirst = false;
- } else {
- System.err.println("*** Error 1 at " + code);
- }
- } else {
- if (characterName.endsWith(" First>")) {
- setFirst = true;
- } else if (characterName.endsWith(" Last>")) {
- System.err.println("*** Error 2 at " + code);
- } else {
- if (prevCodeValue != curCodeValue - 1) {
- appendNewChar(prevIndex, prevCodeValue);
- appendNewChar(index, curCodeValue);
- }
- }
- }
- } else {
- if (setFirst) {
- System.err.println("*** Error 3 at " + code);
- } else if (characterName.endsWith(" First>")) {
- setFirst = true;
- } else if (characterName.endsWith(" Last>")) {
- System.err.println("*** Error 4 at " + code);
- }
- appendNewChar(prevIndex, prevCodeValue);
- appendNewChar(index, curCodeValue);
- prevIndex = index;
- }
- prevCodeValue = curCodeValue;
- } else {
- if (ignoredNew.indexOf(category) == -1) {
- ignoredNew.append(category);
- ignoredNew.append(' ');
- }
- }
- }
- appendNewChar(prevIndex, prevCodeValue);
-
- bin.close();
- fin.close();
- }
- catch (Exception e) {
- System.err.println("Error occurred on accessing " + specfile);
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- private static void appendNewChar(int index, int code) {
- int bufLen = newList[index].length;
- if (newTotalCount[index] == bufLen) {
- int[] tmpBuf = new int[bufLen + 10];
- System.arraycopy(newList[index], 0, tmpBuf, 0, bufLen);
- newList[index] = tmpBuf;
- }
-
- newList[index][newTotalCount[index]++] = code;
- if (code < 0x10000) {
- newListCount[BMP][index]++;
- } else {
- newListCount[nonBMP][index]++;
- }
- }
-
-
- /* Generates the old CategoryMap. */
- private static void generateOldDatafile() {
- try {
- FileWriter fout = new FileWriter(oldDatafile);
- BufferedWriter bout = new BufferedWriter(fout);
-
- bout.write("\n //\n // The following String[][] can be used in CharSet.java as is.\n //\n\n private static final String[][] categoryMap = {\n");
- for (int i = 0; i < categoryNames.length - 1; i++) {
- if (oldTotalCount[i] != 0) {
- bout.write(" { \"" + categoryNames[i] + "\",");
-
- /* 0x0000-0xD7FF */
- if (oldListCount[BEFORE][i] != 0) {
- bout.write(" \"");
-
- bout.write(oldList[BEFORE][i].toString() + "\"\n");
- }
-
- /* 0xD800-0xFFFF */
- if (oldListCount[AFTER][i] != 0) {
- if (oldListCount[BEFORE][i] != 0) {
- bout.write(" + \"");
- } else {
- bout.write(" \"");
- }
- bout.write(oldList[AFTER][i].toString() + "\"\n");
- }
-
- /* 0xD800DC00(0x10000)-0xDBFF0xDFFFF(0x10FFFF) */
- if (oldListCount[SURROGATE][i] != 0) {
- if (oldListCount[BEFORE][i] != 0 || oldListCount[AFTER][i] != 0) {
- bout.write(" + \"");
- } else {
- bout.write(" \"");
- }
- bout.write(oldList[SURROGATE][i].toString() + "\"\n");
- }
- bout.write(" },\n");
-
- }
- }
- bout.write(" };\n\n");
- bout.close();
- fout.close();
- }
- catch (Exception e) {
- System.err.println("Error occurred on accessing " + oldDatafile);
- e.printStackTrace();
- System.exit(1);
- }
-
- System.out.println("\n" + oldDatafile + " has been generated.");
- }
-
-
- /**
- * Test program to be generated
- */
- private static final String outfile = "CharacterCategoryTest.java";
-
- /*
- * Generates a test program which compare the generated date (newer one)
- * with the return values of Characger.getType().
- */
- private static void generateTestProgram() {
- try {
- FileWriter fout = new FileWriter(outfile);
- BufferedWriter bout = new BufferedWriter(fout);
-
- bout.write(collationMethod);
- bout.write("\n //\n // The following arrays can be used in CharSet.java as is.\n //\n\n");
-
- bout.write(" private static final String[] categoryNames = {");
- for (int i = 0; i < categoryNames.length - 1; i++) {
- if (i % 10 == 0) {
- bout.write("\n ");
- }
- bout.write("\"" + categoryNames[i] + "\", ");
- }
- bout.write("\n };\n\n");
-
- bout.write(" private static final int[][] categoryMap = {\n");
-
- for (int i = 0; i < categoryNames.length - 1; i++) {
- StringBuffer sb = new StringBuffer(" { /* Data for \"" + categoryNames[i] + "\" category */");
-
- for (int j = 0; j < newTotalCount[i]; j++) {
- if (j % 8 == 0) {
- sb.append("\n ");
- }
- sb.append(" 0x");
- sb.append(Integer.toString(newList[i][j], 16).toUpperCase());
- sb.append(',');
- }
- sb.append("\n },\n");
- bout.write(sb.toString());
- }
-
- bout.write(" };\n");
-
- bout.write("\n}\n");
-
- bout.close();
- fout.close();
- }
- catch (Exception e) {
- System.err.println("Error occurred on accessing " + outfile);
- e.printStackTrace();
- System.exit(1);
- }
-
- System.out.println("\n" + outfile + " has been generated.");
- }
-
- static String collationMethod =
-"public class CharacterCategoryTest {\n\n" +
-" static final int SIZE = 0x110000;\n" +
-" static final String[] category = {\n" +
-" \"Cn\", \"Lu\", \"Ll\", \"Lt\", \"Lm\", \"Lo\", \"Mn\", \"Me\",\n" +
-" \"Mc\", \"Nd\", \"Nl\", \"No\", \"Zs\", \"Zl\", \"Zp\", \"Cc\",\n" +
-" \"Cf\", \"\", \"Co\", \"Cs\", \"Pd\", \"Ps\", \"Pe\", \"Pc\",\n" +
-" \"Po\", \"Sm\", \"Sc\", \"Sk\", \"So\", \"Pi\", \"Pf\"\n" +
-" };\n\n" +
-" public static void main(String[] args) {\n" +
-" boolean err = false;\n" +
-" byte[] b = new byte[SIZE];\n" +
-" for (int i = 0; i < SIZE; i++) {\n" +
-" b[i] = 0;\n" +
-" }\n" +
-" for (int i = 0; i < categoryMap.length; i++) {\n" +
-" byte categoryNum = 0;\n" +
-" String categoryName = categoryNames[i];\n" +
-" for (int j = 0; j < category.length; j++) {\n" +
-" if (categoryName.equals(category[j])) {\n" +
-" categoryNum = (byte)j;\n" +
-" break;\n" +
-" }\n" +
-" }\n" +
-" int[] values = categoryMap[i];\n" +
-" for (int j = 0; j < values.length;) {\n" +
-" int firstChar = values[j++];\n" +
-" int lastChar = values[j++];\n" +
-" for (int k = firstChar; k <= lastChar; k++) {\n" +
-" b[k] = categoryNum;\n" +
-" }\n" +
-" }\n" +
-" }\n" +
-" for (int i = 0; i < SIZE; i++) {\n" +
-" int characterType = Character.getType(i);\n" +
-" if (b[i] != characterType) {\n" +
-" /* Co, Cs and Sk categories are ignored in CharacterCategory. */\n" +
-" if (characterType == Character.PRIVATE_USE ||\n" +
-" characterType == Character.SURROGATE ||\n" +
-" characterType == Character.MODIFIER_SYMBOL) {\n" +
-" continue;\n" +
-" }\n" +
-" err = true;\n" +
-" System.err.println(\"Category conflict for a character(0x\" +\n" +
-" Integer.toHexString(i) +\n" +
-" \"). CharSet.categoryMap:\" +\n" +
-" category[b[i]] +\n" +
-" \" Character.getType():\" +\n" +
-" category[characterType]);\n" +
-" }\n" +
-" }\n\n" +
-" if (err) {\n" +
-" throw new RuntimeException(\"Conflict occurred between Charset.categoryMap and Character.getType()\");\n" +
-" }\n" +
-" }\n";
-
-}
--- /dev/null 2020-02-11 10:29:13.086348146 +0100
+++ new/src/java.base/share/tools/org/openjdk/buildtools/generatebreakiteratordata/CharacterCategory.java 2020-03-23 19:56:38.643962687 +0100
@@ -0,0 +1,697 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * This is a tool to generate categoryNames and categoryMap which are used in
+ * CharSet.java.
+ */
+
+package org.openjdk.buildtools.generatebreakiteratordata;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.util.StringTokenizer;
+
+class CharacterCategory {
+
+ /**
+ * A list of Unicode category names.
+ */
+ static final String[] categoryNames = {
+ "Ll", /* Letter, Lowercase */
+ "Lu", /* Letter, Uppercase */
+ "Lt", /* Letter, Titlecase */
+ "Lo", /* Letter, Other */
+ "Lm", /* Letter, Modifier */
+ "Nd", /* Number, Decimal Digit */
+ "Nl", /* Number, Letter */
+ "No", /* Number, Other */
+ "Ps", /* Punctuation, Open */
+ "Pe", /* Punctuation, Close */
+ "Pi", /* Punctuation, Initial quote */
+ "Pf", /* Punctuation, Final quote */
+ "Pd", /* Punctuation, Dash */
+ "Pc", /* Punctuation, Connector */
+ "Po", /* Punctuation, Other */
+ "Sc", /* Symbol, Currency */
+ "Sm", /* Symbol, Math */
+ "So", /* Symbol, Other */
+ "Mn", /* Mark, Non-Spacing */
+ "Mc", /* Mark, Spacing Combining */
+ "Me", /* Mark, Enclosing */
+ "Zl", /* Separator, Line */
+ "Zp", /* Separator, Paragraph */
+ "Zs", /* Separator, Space */
+ "Cc", /* Other, Control */
+ "Cf", /* Other, Format */
+ "--", /* Dummy, ignored */
+ // Don't add anything after the Dummy entry!!
+ };
+
+ /**
+ * A array of Unicode code points for each category.
+ */
+ private static int[][] categoryMap;
+
+
+ /**
+ * Generates CategoryMap for GenerateBreakIteratorData.
+ */
+ static void makeCategoryMap(String filename) {
+ /* Overwrite specfile name */
+ specfile = filename;
+
+ /* Generate data in current format (1.5.0) */
+ generateNewData();
+
+ /* Copy generated data to cateogyMap */
+ categoryMap = new int[categoryNames.length-1][];
+ for (int i = 0; i < categoryNames.length-1; i++) {
+ int len = newListCount[BMP][i] + newListCount[nonBMP][i];
+ categoryMap[i] = new int[len];
+ System.arraycopy(newList[i], 0, categoryMap[i], 0, len);
+ }
+ }
+
+ /**
+ * Returns categoryMap for the given category.
+ */
+ static int[] getCategoryMap(int category) {
+ return categoryMap[category];
+ }
+
+
+ /**
+ * Only used for debugging and generating a test program.
+ */
+ public static void main(String[] args) {
+ /* Parses command-line options */
+ processArgs(args);
+
+ /* Generates data in current format (1.5.0) */
+ generateNewData();
+
+ /*
+ * Generates data in older format (1.4.X and earlier) and creates
+ * the old CategoryMap if "oldFilename" is not null.
+ */
+ if (!oldDatafile.equals("")) {
+ generateOldData();
+ generateOldDatafile();
+ }
+
+ /* Displays summary of generated data */
+ showSummary();
+
+ /*
+ * Generates a test program which compares the new data and the return
+ * values of Character.getType().
+ * and the old data and the new data.
+ */
+ generateTestProgram();
+ }
+
+
+ /**
+ * Spec (Unicode data file)
+ */
+ private static String specfile = "UnicodeData.txt";
+
+ /**
+ * Output directory
+ */
+ private static String outputDir = "";
+
+ /**
+ * Old data filename
+ */
+ private static String oldDatafile = "";
+
+ /**
+ * Parses the specified arguments and sets up the variables.
+ */
+ private static void processArgs(String[] args) {
+ for (int i = 0; i < args.length; i++) {
+ String arg =args[i];
+ if (arg.equals("-spec")) {
+ specfile = args[++i];
+ } else if (arg.equals("-old")) {
+ oldDatafile = args[++i];
+ } else if (arg.equals("-o")) {
+ outputDir = args[++i];
+ } else {
+ System.err.println("Usage: java CharacterCategory [-spec specfile]");
+ System.exit(1);
+ }
+ }
+ }
+
+
+ /**
+ * Displays summary of generated data
+ */
+ private static void showSummary() {
+ int oldSum = 0;
+ int newSum = 0;
+ int oldSuppSum = 0;
+ int newSuppSum = 0;
+
+ for (int i = 0; i < categoryNames.length-1; i++) {
+ int newNum = newListCount[BMP][i] + newListCount[nonBMP][i];
+
+ if (oldTotalCount[i] != newNum) {
+ System.err.println("Error: The number of generated data is different between the new approach and the old approach.");
+ }
+ if (oldListCount[SURROGATE][i] != newListCount[nonBMP][i]) {
+ System.err.println("Error: The number of generated supplementarycharacters is different between the new approach and the old approach.");
+ }
+
+ System.out.println(" " + categoryNames[i] + ": " +
+ oldTotalCount[i] +
+ "(" + oldListCount[BEFORE][i] +
+ " + " + oldListCount[SURROGATE][i] +
+ " + " + oldListCount[AFTER][i] + ")" +
+ " --- " + newNum +
+ "(" + newListCount[BMP][i] +
+ " + " + newListCount[nonBMP][i] + ")");
+
+ oldSum += oldListCount[BEFORE][i] * 2 +
+ oldListCount[SURROGATE][i] * 4 +
+ oldListCount[AFTER][i] * 2;
+ newSum += newNum * 4 ;
+ oldSuppSum += oldListCount[SURROGATE][i] * 4;
+ newSuppSum += newListCount[nonBMP][i] * 4;
+ }
+
+ System.out.println("\nTotal buffer sizes are:\n " +
+ oldSum + "bytes(Including " + oldSuppSum +
+ "bytes for supplementary characters)\n " +
+ newSum + "bytes(Including " + newSuppSum +
+ "bytes for supplementary characters)");
+
+ if (!ignoredOld.toString().equals(ignoredNew.toString())) {
+ System.err.println("Ignored categories: Error: List mismatch: " +
+ ignoredOld + " vs. " + ignoredNew);
+ } else {
+ System.out.println("\nIgnored categories: " + ignoredOld);
+ System.out.println("Please confirm that they aren't used in BreakIteratorRules.");
+ }
+ }
+
+
+ private static final int HighSurrogate_CodeUnit_Start = 0xD800;
+ private static final int LowSurrogate_CodeUnit_Start = 0xDC00;
+ private static final int Supplementary_CodePoint_Start = 0x10000;
+
+
+ private static StringBuffer ignoredOld = new StringBuffer();
+ private static int[] oldTotalCount = new int[categoryNames.length];
+ private static int[][] oldListCount = new int[3][categoryNames.length];
+ private static int[][] oldListLen = new int[3][categoryNames.length];
+ private static StringBuffer[][] oldList = new StringBuffer[3][categoryNames.length];
+
+ private static final int BEFORE = 0;
+ private static final int SURROGATE = 1;
+ private static final int AFTER = 2;
+
+ /**
+ * Makes CategoryMap in ordler format which had been used by JDK 1.4.X and
+ * earlier versions.
+ */
+ private static void generateOldData() {
+ /* Initialize arrays. */
+ for (int i = 0; i")) {
+ setFirst = false;
+ } else {
+ appendOldChar(prevIndex, prevCodeValue, prevCode);
+ appendOldChar(index, curCodeValue, code);
+ }
+ }
+ prevCodeValue = curCodeValue;
+ prevCode = code;
+ if (characterName.endsWith(" First>")) {
+ setFirst = true;
+ }
+ } else {
+ if (ignoredOld.indexOf(category) == -1) {
+ ignoredOld.append(category);
+ ignoredOld.append(' ');
+ }
+ }
+ }
+ appendOldChar(prevIndex, prevCodeValue, prevCode);
+
+ bin.close();
+ fin.close();
+ }
+ catch (Exception e) {
+ throw new InternalError(e.toString());
+ }
+ }
+
+ private static void appendOldChar(int index, int code, String s) {
+ int range;
+ if (code < HighSurrogate_CodeUnit_Start) {
+ range = BEFORE;
+ } else if (code < Supplementary_CodePoint_Start) {
+ range = AFTER;
+ } else {
+ range = SURROGATE;
+ }
+
+ if (oldListLen[range][index] > 64) {
+ oldList[range][index].append("\"\n + \"");
+ oldListLen[range][index] = 19;
+ }
+
+ if (code == 0x22 || code == 0x5c) {
+ oldList[range][index].append('\\');
+ oldList[range][index].append((char)code);
+ oldListLen[range][index] += 2;
+ } else if (code > 0x20 && code < 0x7F) {
+ oldList[range][index].append((char)code);
+ oldListLen[range][index] ++;
+ } else {
+ if (range == SURROGATE) {// Need to convert code point to code unit
+ oldList[range][index].append(toCodeUnit(code));
+ oldListLen[range][index] += 12;
+ } else {
+ oldList[range][index].append("\\u");
+ oldList[range][index].append(s);
+ oldListLen[range][index] += 6;
+ }
+ }
+ oldListCount[range][index] ++;
+ oldTotalCount[index]++;
+ }
+
+ private static String toCodeUnit(int i) {
+ StringBuffer sb = new StringBuffer();
+ sb.append("\\u");
+ sb.append(Integer.toString((i - Supplementary_CodePoint_Start) / 0x400 + HighSurrogate_CodeUnit_Start, 16).toUpperCase());
+ sb.append("\\u");
+ sb.append(Integer.toString(i % 0x400 + LowSurrogate_CodeUnit_Start, 16).toUpperCase());
+ return sb.toString();
+ }
+
+ private static int toCodePoint(String s) {
+ char c1 = s.charAt(0);
+
+ if (s.length() == 1 || !Character.isHighSurrogate(c1)) {
+ return (int)c1;
+ } else {
+ char c2 = s.charAt(1);
+ if (s.length() != 2 || !Character.isLowSurrogate(c2)) {
+ return -1;
+ }
+ return Character.toCodePoint(c1, c2);
+ }
+ }
+
+
+ private static StringBuffer ignoredNew = new StringBuffer();
+ private static int[] newTotalCount = new int[categoryNames.length];
+ private static int[][] newListCount = new int[2][categoryNames.length];
+ private static int[][] newList = new int[categoryNames.length][];
+
+ private static final int BMP = 0;
+ private static final int nonBMP = 1;
+
+ /**
+ * Makes CategoryMap in newer format which is used by JDK 1.5.0.
+ */
+ private static void generateNewData() {
+ /* Initialize arrays. */
+ for (int i = 0; i")) {
+ setFirst = false;
+ } else {
+ System.err.println("*** Error 1 at " + code);
+ }
+ } else {
+ if (characterName.endsWith(" First>")) {
+ setFirst = true;
+ } else if (characterName.endsWith(" Last>")) {
+ System.err.println("*** Error 2 at " + code);
+ } else {
+ if (prevCodeValue != curCodeValue - 1) {
+ appendNewChar(prevIndex, prevCodeValue);
+ appendNewChar(index, curCodeValue);
+ }
+ }
+ }
+ } else {
+ if (setFirst) {
+ System.err.println("*** Error 3 at " + code);
+ } else if (characterName.endsWith(" First>")) {
+ setFirst = true;
+ } else if (characterName.endsWith(" Last>")) {
+ System.err.println("*** Error 4 at " + code);
+ }
+ appendNewChar(prevIndex, prevCodeValue);
+ appendNewChar(index, curCodeValue);
+ prevIndex = index;
+ }
+ prevCodeValue = curCodeValue;
+ } else {
+ if (ignoredNew.indexOf(category) == -1) {
+ ignoredNew.append(category);
+ ignoredNew.append(' ');
+ }
+ }
+ }
+ appendNewChar(prevIndex, prevCodeValue);
+
+ bin.close();
+ fin.close();
+ }
+ catch (Exception e) {
+ System.err.println("Error occurred on accessing " + specfile);
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ private static void appendNewChar(int index, int code) {
+ int bufLen = newList[index].length;
+ if (newTotalCount[index] == bufLen) {
+ int[] tmpBuf = new int[bufLen + 10];
+ System.arraycopy(newList[index], 0, tmpBuf, 0, bufLen);
+ newList[index] = tmpBuf;
+ }
+
+ newList[index][newTotalCount[index]++] = code;
+ if (code < 0x10000) {
+ newListCount[BMP][index]++;
+ } else {
+ newListCount[nonBMP][index]++;
+ }
+ }
+
+
+ /* Generates the old CategoryMap. */
+ private static void generateOldDatafile() {
+ try {
+ FileWriter fout = new FileWriter(oldDatafile);
+ BufferedWriter bout = new BufferedWriter(fout);
+
+ bout.write("\n //\n // The following String[][] can be used in CharSet.java as is.\n //\n\n private static final String[][] categoryMap = {\n");
+ for (int i = 0; i < categoryNames.length - 1; i++) {
+ if (oldTotalCount[i] != 0) {
+ bout.write(" { \"" + categoryNames[i] + "\",");
+
+ /* 0x0000-0xD7FF */
+ if (oldListCount[BEFORE][i] != 0) {
+ bout.write(" \"");
+
+ bout.write(oldList[BEFORE][i].toString() + "\"\n");
+ }
+
+ /* 0xD800-0xFFFF */
+ if (oldListCount[AFTER][i] != 0) {
+ if (oldListCount[BEFORE][i] != 0) {
+ bout.write(" + \"");
+ } else {
+ bout.write(" \"");
+ }
+ bout.write(oldList[AFTER][i].toString() + "\"\n");
+ }
+
+ /* 0xD800DC00(0x10000)-0xDBFF0xDFFFF(0x10FFFF) */
+ if (oldListCount[SURROGATE][i] != 0) {
+ if (oldListCount[BEFORE][i] != 0 || oldListCount[AFTER][i] != 0) {
+ bout.write(" + \"");
+ } else {
+ bout.write(" \"");
+ }
+ bout.write(oldList[SURROGATE][i].toString() + "\"\n");
+ }
+ bout.write(" },\n");
+
+ }
+ }
+ bout.write(" };\n\n");
+ bout.close();
+ fout.close();
+ }
+ catch (Exception e) {
+ System.err.println("Error occurred on accessing " + oldDatafile);
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ System.out.println("\n" + oldDatafile + " has been generated.");
+ }
+
+
+ /**
+ * Test program to be generated
+ */
+ private static final String outfile = "CharacterCategoryTest.java";
+
+ /*
+ * Generates a test program which compare the generated date (newer one)
+ * with the return values of Characger.getType().
+ */
+ private static void generateTestProgram() {
+ try {
+ FileWriter fout = new FileWriter(outfile);
+ BufferedWriter bout = new BufferedWriter(fout);
+
+ bout.write(collationMethod);
+ bout.write("\n //\n // The following arrays can be used in CharSet.java as is.\n //\n\n");
+
+ bout.write(" private static final String[] categoryNames = {");
+ for (int i = 0; i < categoryNames.length - 1; i++) {
+ if (i % 10 == 0) {
+ bout.write("\n ");
+ }
+ bout.write("\"" + categoryNames[i] + "\", ");
+ }
+ bout.write("\n };\n\n");
+
+ bout.write(" private static final int[][] categoryMap = {\n");
+
+ for (int i = 0; i < categoryNames.length - 1; i++) {
+ StringBuffer sb = new StringBuffer(" { /* Data for \"" + categoryNames[i] + "\" category */");
+
+ for (int j = 0; j < newTotalCount[i]; j++) {
+ if (j % 8 == 0) {
+ sb.append("\n ");
+ }
+ sb.append(" 0x");
+ sb.append(Integer.toString(newList[i][j], 16).toUpperCase());
+ sb.append(',');
+ }
+ sb.append("\n },\n");
+ bout.write(sb.toString());
+ }
+
+ bout.write(" };\n");
+
+ bout.write("\n}\n");
+
+ bout.close();
+ fout.close();
+ }
+ catch (Exception e) {
+ System.err.println("Error occurred on accessing " + outfile);
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ System.out.println("\n" + outfile + " has been generated.");
+ }
+
+ static String collationMethod =
+"public class CharacterCategoryTest {\n\n" +
+" static final int SIZE = 0x110000;\n" +
+" static final String[] category = {\n" +
+" \"Cn\", \"Lu\", \"Ll\", \"Lt\", \"Lm\", \"Lo\", \"Mn\", \"Me\",\n" +
+" \"Mc\", \"Nd\", \"Nl\", \"No\", \"Zs\", \"Zl\", \"Zp\", \"Cc\",\n" +
+" \"Cf\", \"\", \"Co\", \"Cs\", \"Pd\", \"Ps\", \"Pe\", \"Pc\",\n" +
+" \"Po\", \"Sm\", \"Sc\", \"Sk\", \"So\", \"Pi\", \"Pf\"\n" +
+" };\n\n" +
+" public static void main(String[] args) {\n" +
+" boolean err = false;\n" +
+" byte[] b = new byte[SIZE];\n" +
+" for (int i = 0; i < SIZE; i++) {\n" +
+" b[i] = 0;\n" +
+" }\n" +
+" for (int i = 0; i < categoryMap.length; i++) {\n" +
+" byte categoryNum = 0;\n" +
+" String categoryName = categoryNames[i];\n" +
+" for (int j = 0; j < category.length; j++) {\n" +
+" if (categoryName.equals(category[j])) {\n" +
+" categoryNum = (byte)j;\n" +
+" break;\n" +
+" }\n" +
+" }\n" +
+" int[] values = categoryMap[i];\n" +
+" for (int j = 0; j < values.length;) {\n" +
+" int firstChar = values[j++];\n" +
+" int lastChar = values[j++];\n" +
+" for (int k = firstChar; k <= lastChar; k++) {\n" +
+" b[k] = categoryNum;\n" +
+" }\n" +
+" }\n" +
+" }\n" +
+" for (int i = 0; i < SIZE; i++) {\n" +
+" int characterType = Character.getType(i);\n" +
+" if (b[i] != characterType) {\n" +
+" /* Co, Cs and Sk categories are ignored in CharacterCategory. */\n" +
+" if (characterType == Character.PRIVATE_USE ||\n" +
+" characterType == Character.SURROGATE ||\n" +
+" characterType == Character.MODIFIER_SYMBOL) {\n" +
+" continue;\n" +
+" }\n" +
+" err = true;\n" +
+" System.err.println(\"Category conflict for a character(0x\" +\n" +
+" Integer.toHexString(i) +\n" +
+" \"). CharSet.categoryMap:\" +\n" +
+" category[b[i]] +\n" +
+" \" Character.getType():\" +\n" +
+" category[characterType]);\n" +
+" }\n" +
+" }\n\n" +
+" if (err) {\n" +
+" throw new RuntimeException(\"Conflict occurred between Charset.categoryMap and Character.getType()\");\n" +
+" }\n" +
+" }\n";
+
+}
--- old/make/jdk/src/classes/build/tools/generatebreakiteratordata/DictionaryBasedBreakIteratorBuilder.java 2020-03-23 19:56:39.895962678 +0100
+++ /dev/null 2020-02-11 10:29:13.086348146 +0100
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package build.tools.generatebreakiteratordata;
-
-import java.util.Hashtable;
-import java.util.Vector;
-
-/**
- * The Builder class for DictionaryBasedBreakIterator inherits almost all of
- * its functionality from RuleBasedBreakIteratorBuilder, but extends it with
- * extra logic to handle the "" token.
- */
-class DictionaryBasedBreakIteratorBuilder extends RuleBasedBreakIteratorBuilder {
-
- /**
- * A list of flags indicating which character categories are contained in
- * the dictionary file (this is used to determine which ranges of characters
- * to apply the dictionary to)
- */
- private boolean[] categoryFlags;
-
- /**
- * A CharSet that contains all the characters represented in the dictionary
- */
- private CharSet dictionaryChars = new CharSet();
- private String dictionaryExpression = "";
-
- public DictionaryBasedBreakIteratorBuilder(String description) {
- super(description);
- }
-
- /**
- * We override handleSpecialSubstitution() to add logic to handle
- * the tag. If we see a substitution named "",
- * parse the substitution expression and store the result in
- * dictionaryChars.
- */
- protected void handleSpecialSubstitution(String replace, String replaceWith,
- int startPos, String description) {
- super.handleSpecialSubstitution(replace, replaceWith, startPos, description);
-
- if (replace.equals("")) {
- if (replaceWith.charAt(0) == '(') {
- error("Dictionary group can't be enclosed in (", startPos, description);
- }
- dictionaryExpression = replaceWith;
- dictionaryChars = CharSet.parseString(replaceWith);
- }
- }
-
- /**
- * The other half of the logic to handle the dictionary characters happens
- * here. After the inherited builder has derived the real character
- * categories, we set up the categoryFlags array in the iterator. This array
- * contains "true" for every character category that includes a dictionary
- * character.
- */
- protected void buildCharCategories(Vector tempRuleList) {
- super.buildCharCategories(tempRuleList);
-
- categoryFlags = new boolean[categories.size()];
- for (int i = 0; i < categories.size(); i++) {
- CharSet cs = categories.elementAt(i);
- if (!(cs.intersection(dictionaryChars).empty())) {
- categoryFlags[i] = true;
- }
- }
- }
-
- // This function is actually called by
- // RuleBasedBreakIteratorBuilder.buildCharCategories(), which is called by
- // the function above. This gives us a way to create a separate character
- // category for the dictionary characters even when
- // RuleBasedBreakIteratorBuilder isn't making a distinction.
- protected void mungeExpressionList(Hashtable expressions) {
- expressions.put(dictionaryExpression, dictionaryChars);
- }
-
- void makeFile(String filename) {
- super.setAdditionalData(super.toByteArray(categoryFlags));
- super.makeFile(filename);
- }
-}
--- /dev/null 2020-02-11 10:29:13.086348146 +0100
+++ new/src/java.base/share/tools/org/openjdk/buildtools/generatebreakiteratordata/DictionaryBasedBreakIteratorBuilder.java 2020-03-23 19:56:39.503962680 +0100
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package org.openjdk.buildtools.generatebreakiteratordata;
+
+import java.util.Hashtable;
+import java.util.Vector;
+
+/**
+ * The Builder class for DictionaryBasedBreakIterator inherits almost all of
+ * its functionality from RuleBasedBreakIteratorBuilder, but extends it with
+ * extra logic to handle the "" token.
+ */
+class DictionaryBasedBreakIteratorBuilder extends RuleBasedBreakIteratorBuilder {
+
+ /**
+ * A list of flags indicating which character categories are contained in
+ * the dictionary file (this is used to determine which ranges of characters
+ * to apply the dictionary to)
+ */
+ private boolean[] categoryFlags;
+
+ /**
+ * A CharSet that contains all the characters represented in the dictionary
+ */
+ private CharSet dictionaryChars = new CharSet();
+ private String dictionaryExpression = "";
+
+ public DictionaryBasedBreakIteratorBuilder(String description) {
+ super(description);
+ }
+
+ /**
+ * We override handleSpecialSubstitution() to add logic to handle
+ * the tag. If we see a substitution named "",
+ * parse the substitution expression and store the result in
+ * dictionaryChars.
+ */
+ protected void handleSpecialSubstitution(String replace, String replaceWith,
+ int startPos, String description) {
+ super.handleSpecialSubstitution(replace, replaceWith, startPos, description);
+
+ if (replace.equals("")) {
+ if (replaceWith.charAt(0) == '(') {
+ error("Dictionary group can't be enclosed in (", startPos, description);
+ }
+ dictionaryExpression = replaceWith;
+ dictionaryChars = CharSet.parseString(replaceWith);
+ }
+ }
+
+ /**
+ * The other half of the logic to handle the dictionary characters happens
+ * here. After the inherited builder has derived the real character
+ * categories, we set up the categoryFlags array in the iterator. This array
+ * contains "true" for every character category that includes a dictionary
+ * character.
+ */
+ protected void buildCharCategories(Vector tempRuleList) {
+ super.buildCharCategories(tempRuleList);
+
+ categoryFlags = new boolean[categories.size()];
+ for (int i = 0; i < categories.size(); i++) {
+ CharSet cs = categories.elementAt(i);
+ if (!(cs.intersection(dictionaryChars).empty())) {
+ categoryFlags[i] = true;
+ }
+ }
+ }
+
+ // This function is actually called by
+ // RuleBasedBreakIteratorBuilder.buildCharCategories(), which is called by
+ // the function above. This gives us a way to create a separate character
+ // category for the dictionary characters even when
+ // RuleBasedBreakIteratorBuilder isn't making a distinction.
+ protected void mungeExpressionList(Hashtable expressions) {
+ expressions.put(dictionaryExpression, dictionaryChars);
+ }
+
+ void makeFile(String filename) {
+ super.setAdditionalData(super.toByteArray(categoryFlags));
+ super.makeFile(filename);
+ }
+}
--- old/make/jdk/src/classes/build/tools/generatebreakiteratordata/GenerateBreakIteratorData.java 2020-03-23 19:56:40.739962671 +0100
+++ /dev/null 2020-02-11 10:29:13.086348146 +0100
@@ -1,207 +0,0 @@
-/*
- * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package build.tools.generatebreakiteratordata;
-
-import java.util.Enumeration;
-import java.util.ListResourceBundle;
-import java.util.Locale;
-import java.util.ResourceBundle;
-
-/**
- * Generates datafile for BreakIterator.
- */
-public class GenerateBreakIteratorData {
-
- /**
- * Directory where generated data files are put in.
- */
- private static String outputDir = "" ;
-
- /**
- * Unicode data file
- */
- private static String unicodeData = "UnicodeData.txt";
-
- /**
- * Locale data
- */
- private static String language = "";
- private static String country = "";
- private static String valiant = "";
- private static String localeName = ""; /* _language_country_valiant */
-
-
- public static void main(String[] args) {
- /* Parse command-line options */
- processArgs(args);
-
- /* Make categoryMap from Unicode data */
- CharacterCategory.makeCategoryMap(unicodeData);
-
- /* Generate files */
- try {
- generateFiles();
- } catch (Exception e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- private static String localizedBundleName(String pkg, String clazz) {
- if (language.length() > 0) {
- return pkg + ".ext." + clazz + '_' + language;
- } else {
- return pkg + '.' + clazz;
- }
- }
-
- /**
- * Generate data files whose names are included in
- * sun.text.resources.BreakIteratorInfo+
- */
- private static void generateFiles() throws Exception {
- String[] classNames;
- ResourceBundle rules, info;
-
- info = (ResourceBundle) Class.forName(
- localizedBundleName("sun.text.resources", "BreakIteratorInfo")).getDeclaredConstructor().newInstance();
-
- classNames = info.getStringArray("BreakIteratorClasses");
-
- rules = (ResourceBundle) Class.forName(
- localizedBundleName("sun.text.resources", "BreakIteratorRules")).getDeclaredConstructor().newInstance();
-
- if (info.containsKey("CharacterData")) {
- generateDataFile(info.getString("CharacterData"),
- rules.getString("CharacterBreakRules"),
- classNames[0]);
- }
- if (info.containsKey("WordData")) {
- generateDataFile(info.getString("WordData"),
- rules.getString("WordBreakRules"),
- classNames[1]);
- }
- if (info.containsKey("LineData")) {
- generateDataFile(info.getString("LineData"),
- rules.getString("LineBreakRules"),
- classNames[2]);
- }
- if (info.containsKey("SentenceData")) {
- generateDataFile(info.getString("SentenceData"),
- rules.getString("SentenceBreakRules"),
- classNames[3]);
- }
- }
-
- /**
- * Generate a data file for break-iterator
- */
- private static void generateDataFile(String datafile, String rule, String builder) {
- RuleBasedBreakIteratorBuilder bld;
- if (builder.equals("RuleBasedBreakIterator")) {
- bld = new RuleBasedBreakIteratorBuilder(rule);
- } else if (builder.equals("DictionaryBasedBreakIterator")) {
- bld = new DictionaryBasedBreakIteratorBuilder(rule);
- } else {
- throw new IllegalArgumentException("Invalid break iterator class \"" + builder + "\"");
- }
-
- bld.makeFile(datafile);
- }
-
- /**
- * Parses the specified arguments and sets up the variables.
- */
- private static void processArgs(String[] args) {
- for (int i = 0; i < args.length; i++) {
- String arg = args[i];
- if (arg.equals("-o")) {
- outputDir = args[++i];
- } else if (arg.equals("-spec")) {
- unicodeData = args[++i];
- } else if (arg.equals("-language")) {
- language = args[++i];
- } else if (arg.equals("-country")) {
- country = args[++i];
- } else if (arg.equals("-valiant")) {
- valiant = args[++i];
- } else {
- usage();
- }
- }
-
- // Set locale name
- localeName = getLocaleName();
- }
-
- /**
- * Make locale name ("_language_country_valiant")
- */
- private static String getLocaleName() {
- if (language.equals("")) {
- if (!country.equals("") || !valiant.equals("")) {
- language = "en";
- } else {
- return "";
- }
- }
-
- StringBuffer sb = new StringBuffer();
- sb.append('_');
- sb.append(language);
- if (!country.equals("") || !valiant.equals("")) {
- sb.append('_');
- sb.append(country);
- if (!valiant.equals("")) {
- sb.append('_');
- sb.append(valiant);
- }
- }
-
- return sb.toString();
- }
-
- /**
- * Usage: Displayed when an invalid command-line option is specified.
- */
- private static void usage() {
- System.err.println("Usage: GenerateBreakIteratorData [options]\n" +
- " -o outputDir output directory name\n" +
- " -spec specname unicode text filename\n" +
- " and locale data:\n" +
- " -lang language target language name\n" +
- " -country country target country name\n" +
- " -valiant valiant target valiant name\n"
- );
- }
-
- /**
- * Return the path of output directory
- */
- static String getOutputDirectory() {
- return outputDir;
- }
-}
--- /dev/null 2020-02-11 10:29:13.086348146 +0100
+++ new/src/java.base/share/tools/org/openjdk/buildtools/generatebreakiteratordata/GenerateBreakIteratorData.java 2020-03-23 19:56:40.303962675 +0100
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package org.openjdk.buildtools.generatebreakiteratordata;
+
+import java.util.Enumeration;
+import java.util.ListResourceBundle;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+/**
+ * Generates datafile for BreakIterator.
+ */
+public class GenerateBreakIteratorData {
+
+ /**
+ * Directory where generated data files are put in.
+ */
+ private static String outputDir = "" ;
+
+ /**
+ * Unicode data file
+ */
+ private static String unicodeData = "UnicodeData.txt";
+
+ /**
+ * Locale data
+ */
+ private static String language = "";
+ private static String country = "";
+ private static String valiant = "";
+ private static String localeName = ""; /* _language_country_valiant */
+
+
+ public static void main(String[] args) {
+ /* Parse command-line options */
+ processArgs(args);
+
+ /* Make categoryMap from Unicode data */
+ CharacterCategory.makeCategoryMap(unicodeData);
+
+ /* Generate files */
+ try {
+ generateFiles();
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ private static String localizedBundleName(String pkg, String clazz) {
+ if (language.length() > 0) {
+ return pkg + ".ext." + clazz + '_' + language;
+ } else {
+ return pkg + '.' + clazz;
+ }
+ }
+
+ /**
+ * Generate data files whose names are included in
+ * sun.text.resources.BreakIteratorInfo+
+ */
+ private static void generateFiles() throws Exception {
+ String[] classNames;
+ ResourceBundle rules, info;
+
+ info = (ResourceBundle) Class.forName(
+ localizedBundleName("sun.text.resources", "BreakIteratorInfo")).getDeclaredConstructor().newInstance();
+
+ classNames = info.getStringArray("BreakIteratorClasses");
+
+ rules = (ResourceBundle) Class.forName(
+ localizedBundleName("sun.text.resources", "BreakIteratorRules")).getDeclaredConstructor().newInstance();
+
+ if (info.containsKey("CharacterData")) {
+ generateDataFile(info.getString("CharacterData"),
+ rules.getString("CharacterBreakRules"),
+ classNames[0]);
+ }
+ if (info.containsKey("WordData")) {
+ generateDataFile(info.getString("WordData"),
+ rules.getString("WordBreakRules"),
+ classNames[1]);
+ }
+ if (info.containsKey("LineData")) {
+ generateDataFile(info.getString("LineData"),
+ rules.getString("LineBreakRules"),
+ classNames[2]);
+ }
+ if (info.containsKey("SentenceData")) {
+ generateDataFile(info.getString("SentenceData"),
+ rules.getString("SentenceBreakRules"),
+ classNames[3]);
+ }
+ }
+
+ /**
+ * Generate a data file for break-iterator
+ */
+ private static void generateDataFile(String datafile, String rule, String builder) {
+ RuleBasedBreakIteratorBuilder bld;
+ if (builder.equals("RuleBasedBreakIterator")) {
+ bld = new RuleBasedBreakIteratorBuilder(rule);
+ } else if (builder.equals("DictionaryBasedBreakIterator")) {
+ bld = new DictionaryBasedBreakIteratorBuilder(rule);
+ } else {
+ throw new IllegalArgumentException("Invalid break iterator class \"" + builder + "\"");
+ }
+
+ bld.makeFile(datafile);
+ }
+
+ /**
+ * Parses the specified arguments and sets up the variables.
+ */
+ private static void processArgs(String[] args) {
+ for (int i = 0; i < args.length; i++) {
+ String arg = args[i];
+ if (arg.equals("-o")) {
+ outputDir = args[++i];
+ } else if (arg.equals("-spec")) {
+ unicodeData = args[++i];
+ } else if (arg.equals("-language")) {
+ language = args[++i];
+ } else if (arg.equals("-country")) {
+ country = args[++i];
+ } else if (arg.equals("-valiant")) {
+ valiant = args[++i];
+ } else {
+ usage();
+ }
+ }
+
+ // Set locale name
+ localeName = getLocaleName();
+ }
+
+ /**
+ * Make locale name ("_language_country_valiant")
+ */
+ private static String getLocaleName() {
+ if (language.equals("")) {
+ if (!country.equals("") || !valiant.equals("")) {
+ language = "en";
+ } else {
+ return "";
+ }
+ }
+
+ StringBuffer sb = new StringBuffer();
+ sb.append('_');
+ sb.append(language);
+ if (!country.equals("") || !valiant.equals("")) {
+ sb.append('_');
+ sb.append(country);
+ if (!valiant.equals("")) {
+ sb.append('_');
+ sb.append(valiant);
+ }
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * Usage: Displayed when an invalid command-line option is specified.
+ */
+ private static void usage() {
+ System.err.println("Usage: GenerateBreakIteratorData [options]\n" +
+ " -o outputDir output directory name\n" +
+ " -spec specname unicode text filename\n" +
+ " and locale data:\n" +
+ " -lang language target language name\n" +
+ " -country country target country name\n" +
+ " -valiant valiant target valiant name\n"
+ );
+ }
+
+ /**
+ * Return the path of output directory
+ */
+ static String getOutputDirectory() {
+ return outputDir;
+ }
+}
--- old/make/jdk/src/classes/build/tools/generatebreakiteratordata/RuleBasedBreakIteratorBuilder.java 2020-03-23 19:56:41.539962665 +0100
+++ /dev/null 2020-02-11 10:29:13.086348146 +0100
@@ -1,2198 +0,0 @@
-/*
- * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package build.tools.generatebreakiteratordata;
-
-import java.io.*;
-import java.util.Enumeration;
-import java.util.Hashtable;
-import java.util.Stack;
-import java.util.Vector;
-import java.util.zip.CRC32;
-import sun.text.CompactByteArray;
-
-/**
- * This class has the job of constructing a RuleBasedBreakIterator from a
- * textual description. A Builder is constructed by GenerateBreakIteratorData,
- * which uses it to construct the iterator itself and then throws it away.
- *
The construction logic is separated out into its own class for two primary
- * reasons:
- *
- *
The construction logic is quite sophisticated and large. Separating
- * it out into its own class means the code must only be loaded into memory
- * while a RuleBasedBreakIterator is being constructed, and can be purged after
- * that.
- *
There is a fair amount of state that must be maintained throughout the
- * construction process that is not needed by the iterator after construction.
- * Separating this state out into another class prevents all of the functions
- * that construct the iterator from having to have really long parameter lists,
- * (hopefully) contributing to readability and maintainability.
- *
- *
- * It'd be really nice if this could be an independent class rather than an
- * inner class, because that would shorten the source file considerably, but
- * making Builder an inner class of RuleBasedBreakIterator allows it direct
- * access to RuleBasedBreakIterator's private members, which saves us from
- * having to provide some kind of "back door" to the Builder class that could
- * then also be used by other classes.
- */
-class RuleBasedBreakIteratorBuilder {
-
- /**
- * A token used as a character-category value to identify ignore characters
- */
- protected static final byte IGNORE = -1;
-
- /**
- * Tables that indexes from character values to character category numbers
- */
- private CompactByteArray charCategoryTable = null;
- private SupplementaryCharacterData supplementaryCharCategoryTable = null;
-
- /**
- * The table of state transitions used for forward iteration
- */
- private short[] stateTable = null;
-
- /**
- * The table of state transitions used to sync up the iterator with the
- * text in backwards and random-access iteration
- */
- private short[] backwardsStateTable = null;
-
- /**
- * A list of flags indicating which states in the state table are accepting
- * ("end") states
- */
- private boolean[] endStates = null;
-
- /**
- * A list of flags indicating which states in the state table are
- * lookahead states (states which turn lookahead on and off)
- */
- private boolean[] lookaheadStates = null;
-
- /**
- * A table for additional data. May be used by a subclass of
- * RuleBasedBreakIterator.
- */
- private byte[] additionalData = null;
-
- /**
- * The number of character categories (and, thus, the number of columns in
- * the state tables)
- */
- private int numCategories;
-
- /**
- * A temporary holding place used for calculating the character categories.
- * This object contains CharSet objects.
- */
- protected Vector categories = null;
-
- /**
- * A table used to map parts of regexp text to lists of character
- * categories, rather than having to figure them out from scratch each time
- */
- protected Hashtable expressions = null;
-
- /**
- * A temporary holding place for the list of ignore characters
- */
- protected CharSet ignoreChars = null;
-
- /**
- * A temporary holding place where the forward state table is built
- */
- protected Vector tempStateTable = null;
-
- /**
- * A list of all the states that have to be filled in with transitions to
- * the next state that is created. Used when building the state table from
- * the regular expressions.
- */
- protected Vector decisionPointList = null;
-
- /**
- * A stack for holding decision point lists. This is used to handle nested
- * parentheses and braces in regexps.
- */
- protected Stack> decisionPointStack = null;
-
- /**
- * A list of states that loop back on themselves. Used to handle .*?
- */
- protected Vector loopingStates = null;
-
- /**
- * Looping states actually have to be backfilled later in the process
- * than everything else. This is where a the list of states to backfill
- * is accumulated. This is also used to handle .*?
- */
- protected Vector statesToBackfill = null;
-
- /**
- * A list mapping pairs of state numbers for states that are to be combined
- * to the state number of the state representing their combination. Used
- * in the process of making the state table deterministic to prevent
- * infinite recursion.
- */
- protected Vector mergeList = null;
-
- /**
- * A flag that is used to indicate when the list of looping states can
- * be reset.
- */
- protected boolean clearLoopingStates = false;
-
- /**
- * A bit mask used to indicate a bit in the table's flags column that marks
- * a state as an accepting state.
- */
- protected static final int END_STATE_FLAG = 0x8000;
-
- /**
- * A bit mask used to indicate a bit in the table's flags column that marks
- * a state as one the builder shouldn't loop to any looping states
- */
- protected static final int DONT_LOOP_FLAG = 0x4000;
-
- /**
- * A bit mask used to indicate a bit in the table's flags column that marks
- * a state as a lookahead state.
- */
- protected static final int LOOKAHEAD_STATE_FLAG = 0x2000;
-
- /**
- * A bit mask representing the union of the mask values listed above.
- * Used for clearing or masking off the flag bits.
- */
- protected static final int ALL_FLAGS = END_STATE_FLAG
- | LOOKAHEAD_STATE_FLAG
- | DONT_LOOP_FLAG;
-
- /**
- * This is the main function for setting up the BreakIterator's tables. It
- * just vectors different parts of the job off to other functions.
- */
- public RuleBasedBreakIteratorBuilder(String description) {
- Vector tempRuleList = buildRuleList(description);
- buildCharCategories(tempRuleList);
- buildStateTable(tempRuleList);
- buildBackwardsStateTable(tempRuleList);
- }
-
- /**
- * Thus function has three main purposes:
- *
Perform general syntax checking on the description, so the rest
- * of the build code can assume that it's parsing a legal description.
- *
Split the description into separate rules
- *
Perform variable-name substitutions (so that no one else sees
- * variable names)
- *
- */
- private Vector buildRuleList(String description) {
- // invariants:
- // - parentheses must be balanced: ()[]{}<>
- // - nothing can be nested inside <>
- // - nothing can be nested inside [] except more []s
- // - pairs of ()[]{}<> must not be empty
- // - ; can only occur at the outer level
- // - | can only appear inside ()
- // - only one = or / can occur in a single rule
- // - = and / cannot both occur in the same rule
- // - <> can only occur on the left side of a = expression
- // (because we'll perform substitutions to eliminate them other places)
- // - the left-hand side of a = expression can only be a single character
- // (possibly with \) or text inside <>
- // - the right-hand side of a = expression must be enclosed in [] or ()
- // - * may not occur at the beginning of a rule, nor may it follow
- // =, /, (, (, |, }, ;, or *
- // - ? may only follow *
- // - the rule list must contain at least one / rule
- // - no rule may be empty
- // - all printing characters in the ASCII range except letters and digits
- // are reserved and must be preceded by \
- // - ! may only occur at the beginning of a rule
-
- // set up a vector to contain the broken-up description (each entry in the
- // vector is a separate rule) and a stack for keeping track of opening
- // punctuation
- Vector tempRuleList = new Vector<>();
- Stack parenStack = new Stack<>();
-
- int p = 0;
- int ruleStart = 0;
- int c = '\u0000';
- int lastC = '\u0000';
- int lastOpen = '\u0000';
- boolean haveEquals = false;
- boolean havePipe = false;
- boolean sawVarName = false;
- final String charsThatCantPrecedeAsterisk = "=/{(|}*;\u0000";
-
- // if the description doesn't end with a semicolon, tack a semicolon onto the end
- if (description.length() != 0 &&
- description.codePointAt(description.length() - 1) != ';') {
- description = description + ";";
- }
-
- // for each character, do...
- while (p < description.length()) {
- c = description.codePointAt(p);
-
- switch (c) {
- // if the character is a backslash, skip the character that follows it
- // (it'll get treated as a literal character)
- case '\\':
- ++p;
- break;
-
- // if the character is opening punctuation, verify that no nesting
- // rules are broken, and push the character onto the stack
- case '{':
- case '<':
- case '[':
- case '(':
- if (lastOpen == '<') {
- error("Can't nest brackets inside <>", p, description);
- }
- if (lastOpen == '[' && c != '[') {
- error("Can't nest anything in [] but []", p, description);
- }
-
- // if we see < anywhere except on the left-hand side of =,
- // we must be seeing a variable name that was never defined
- if (c == '<' && (haveEquals || havePipe)) {
- error("Unknown variable name", p, description);
- }
-
- lastOpen = c;
- parenStack.push(Character.valueOf((char)c));
- if (c == '<') {
- sawVarName = true;
- }
- break;
-
- // if the character is closing punctuation, verify that it matches the
- // last opening punctuation we saw, and that the brackets contain
- // something, then pop the stack
- case '}':
- case '>':
- case ']':
- case ')':
- char expectedClose = '\u0000';
- switch (lastOpen) {
- case '{':
- expectedClose = '}';
- break;
- case '[':
- expectedClose = ']';
- break;
- case '(':
- expectedClose = ')';
- break;
- case '<':
- expectedClose = '>';
- break;
- }
- if (c != expectedClose) {
- error("Unbalanced parentheses", p, description);
- }
- if (lastC == lastOpen) {
- error("Parens don't contain anything", p, description);
- }
- parenStack.pop();
- if (!parenStack.empty()) {
- lastOpen = parenStack.peek().charValue();
- }
- else {
- lastOpen = '\u0000';
- }
-
- break;
-
- // if the character is an asterisk, make sure it occurs in a place
- // where an asterisk can legally go
- case '*':
- if (charsThatCantPrecedeAsterisk.indexOf(lastC) != -1) {
- error("Misplaced asterisk", p, description);
- }
- break;
-
- // if the character is a question mark, make sure it follows an asterisk
- case '?':
- if (lastC != '*') {
- error("Misplaced ?", p, description);
- }
- break;
-
- // if the character is an equals sign, make sure we haven't seen another
- // equals sign or a slash yet
- case '=':
- if (haveEquals || havePipe) {
- error("More than one = or / in rule", p, description);
- }
- haveEquals = true;
- break;
-
- // if the character is a slash, make sure we haven't seen another slash
- // or an equals sign yet
- case '/':
- if (haveEquals || havePipe) {
- error("More than one = or / in rule", p, description);
- }
- if (sawVarName) {
- error("Unknown variable name", p, description);
- }
- havePipe = true;
- break;
-
- // if the character is an exclamation point, make sure it occurs only
- // at the beginning of a rule
- case '!':
- if (lastC != ';' && lastC != '\u0000') {
- error("! can only occur at the beginning of a rule", p, description);
- }
- break;
-
- // we don't have to do anything special on a period
- case '.':
- break;
-
- // if the character is a syntax character that can only occur
- // inside [], make sure that it does in fact only occur inside [].
- case '^':
- case '-':
- case ':':
- if (lastOpen != '[' && lastOpen != '<') {
- error("Illegal character", p, description);
- }
- break;
-
- // if the character is a semicolon, do the following...
- case ';':
- // make sure the rule contains something and that there are no
- // unbalanced parentheses or brackets
- if (lastC == ';' || lastC == '\u0000') {
- error("Empty rule", p, description);
- }
- if (!parenStack.empty()) {
- error("Unbalanced parenheses", p, description);
- }
-
- if (parenStack.empty()) {
- // if the rule contained an = sign, call processSubstitution()
- // to replace the substitution name with the substitution text
- // wherever it appears in the description
- if (haveEquals) {
- description = processSubstitution(description.substring(ruleStart,
- p), description, p + 1);
- }
- else {
- // otherwise, check to make sure the rule doesn't reference
- // any undefined substitutions
- if (sawVarName) {
- error("Unknown variable name", p, description);
- }
-
- // then add it to tempRuleList
- tempRuleList.addElement(description.substring(ruleStart, p));
- }
-
- // and reset everything to process the next rule
- ruleStart = p + 1;
- haveEquals = havePipe = sawVarName = false;
- }
- break;
-
- // if the character is a vertical bar, check to make sure that it
- // occurs inside a () expression and that the character that precedes
- // it isn't also a vertical bar
- case '|':
- if (lastC == '|') {
- error("Empty alternative", p, description);
- }
- if (parenStack.empty() || lastOpen != '(') {
- error("Misplaced |", p, description);
- }
- break;
-
- // if the character is anything else (escaped characters are
- // skipped and don't make it here), it's an error
- default:
- if (c >= ' ' && c < '\u007f' && !Character.isLetter((char)c)
- && !Character.isDigit((char)c)) {
- error("Illegal character", p, description);
- }
- if (c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
- ++p;
- }
- break;
- }
- lastC = c;
- ++p;
- }
- if (tempRuleList.size() == 0) {
- error("No valid rules in description", p, description);
- }
- return tempRuleList;
- }
-
- /**
- * This function performs variable-name substitutions. First it does syntax
- * checking on the variable-name definition. If it's syntactically valid, it
- * then goes through the remainder of the description and does a simple
- * find-and-replace of the variable name with its text. (The variable text
- * must be enclosed in either [] or () for this to work.)
- */
- protected String processSubstitution(String substitutionRule, String description,
- int startPos) {
- // isolate out the text on either side of the equals sign
- String replace;
- String replaceWith;
- int equalPos = substitutionRule.indexOf('=');
- replace = substitutionRule.substring(0, equalPos);
- replaceWith = substitutionRule.substring(equalPos + 1);
-
- // check to see whether the substitution name is something we've declared
- // to be "special". For RuleBasedBreakIterator itself, this is "".
- // This function takes care of any extra processing that has to be done
- // with "special" substitution names.
- handleSpecialSubstitution(replace, replaceWith, startPos, description);
-
- // perform various other syntax checks on the rule
- if (replaceWith.length() == 0) {
- error("Nothing on right-hand side of =", startPos, description);
- }
- if (replace.length() == 0) {
- error("Nothing on left-hand side of =", startPos, description);
- }
- if (replace.length() == 2 && replace.charAt(0) != '\\') {
- error("Illegal left-hand side for =", startPos, description);
- }
- if (replace.length() >= 3 && replace.charAt(0) != '<' &&
- replace.codePointBefore(equalPos) != '>') {
- error("Illegal left-hand side for =", startPos, description);
- }
- if (!(replaceWith.charAt(0) == '[' &&
- replaceWith.charAt(replaceWith.length() - 1) == ']') &&
- !(replaceWith.charAt(0) == '(' &&
- replaceWith.charAt(replaceWith.length() - 1) == ')')) {
- error("Illegal right-hand side for =", startPos, description);
- }
-
- // now go through the rest of the description (which hasn't been broken up
- // into separate rules yet) and replace every occurrence of the
- // substitution name with the substitution body
- StringBuffer result = new StringBuffer();
- result.append(description.substring(0, startPos));
- int lastPos = startPos;
- int pos = description.indexOf(replace, startPos);
- while (pos != -1) {
- result.append(description.substring(lastPos, pos));
- result.append(replaceWith);
- lastPos = pos + replace.length();
- pos = description.indexOf(replace, lastPos);
- }
- result.append(description.substring(lastPos));
- return result.toString();
- }
-
- /**
- * This function defines a protocol for handling substitution names that
- * are "special," i.e., that have some property beyond just being
- * substitutions. At the RuleBasedBreakIterator level, we have one
- * special substitution name, "". Subclasses can override this
- * function to add more. Any special processing that has to go on beyond
- * that which is done by the normal substitution-processing code is done
- * here.
- */
- protected void handleSpecialSubstitution(String replace, String replaceWith,
- int startPos, String description) {
- // if we get a definition for a substitution called "ignore", it defines
- // the ignore characters for the iterator. Check to make sure the expression
- // is a [] expression, and if it is, parse it and store the characters off
- // to the side.
- if (replace.equals("")) {
- if (replaceWith.charAt(0) == '(') {
- error("Ignore group can't be enclosed in (", startPos, description);
- }
- ignoreChars = CharSet.parseString(replaceWith);
- }
- }
-
- /**
- * This function builds the character category table. On entry,
- * tempRuleList is a vector of break rules that has had variable names substituted.
- * On exit, the charCategoryTable data member has been initialized to hold the
- * character category table, and tempRuleList's rules have been munged to contain
- * character category numbers everywhere a literal character or a [] expression
- * originally occurred.
- */
- @SuppressWarnings("fallthrough")
- protected void buildCharCategories(Vector tempRuleList) {
- int bracketLevel = 0;
- int p = 0;
- int lineNum = 0;
-
- // build hash table of every literal character or [] expression in the rule list
- // and use CharSet.parseString() to derive a CharSet object representing the
- // characters each refers to
- expressions = new Hashtable<>();
- while (lineNum < tempRuleList.size()) {
- String line = tempRuleList.elementAt(lineNum);
- p = 0;
- while (p < line.length()) {
- int c = line.codePointAt(p);
- switch (c) {
- // skip over all syntax characters except [
- case '{': case '}': case '(': case ')': case '*': case '.':
- case '/': case '|': case ';': case '?': case '!':
- break;
-
- // for [, find the matching ] (taking nested [] pairs into account)
- // and add the whole expression to the expression list
- case '[':
- int q = p + 1;
- ++bracketLevel;
- while (q < line.length() && bracketLevel != 0) {
- c = line.codePointAt(q);
- switch (c) {
- case '\\':
- q++;
- break;
- case '[':
- ++bracketLevel;
- break;
- case ']':
- --bracketLevel;
- break;
- }
- q = q + Character.charCount(c);
- }
- if (expressions.get(line.substring(p, q)) == null) {
- expressions.put(line.substring(p, q), CharSet.parseString(line.substring(p, q)));
- }
- p = q - 1;
- break;
-
- // for \ sequences, just move to the next character and treat
- // it as a single character
- case '\\':
- ++p;
- c = line.codePointAt(p);
- // DON'T break; fall through into "default" clause
-
- // for an isolated single character, add it to the expression list
- default:
- expressions.put(line.substring(p, p + 1), CharSet.parseString(line.substring(p, p + 1)));
- break;
- }
- p += Character.charCount(line.codePointAt(p));
- }
- ++lineNum;
- }
- // dump CharSet's internal expression cache
- CharSet.releaseExpressionCache();
-
- // create the temporary category table (which is a vector of CharSet objects)
- categories = new Vector<>();
- if (ignoreChars != null) {
- categories.addElement(ignoreChars);
- }
- else {
- categories.addElement(new CharSet());
- }
- ignoreChars = null;
-
- // this is a hook to allow subclasses to add categories on their own
- mungeExpressionList(expressions);
-
- // Derive the character categories. Go through the existing character categories
- // looking for overlap. Any time there's overlap, we create a new character
- // category for the characters that overlapped and remove them from their original
- // category. At the end, any characters that are left in the expression haven't
- // been mentioned in any category, so another new category is created for them.
- // For example, if the first expression is [abc], then a, b, and c will be placed
- // into a single character category. If the next expression is [bcd], we will first
- // remove b and c from their existing category (leaving a behind), create a new
- // category for b and c, and then create another new category for d (which hadn't
- // been mentioned in the previous expression).
- // At no time should a character ever occur in more than one character category.
-
- // for each expression in the expressions list, do...
- for (Enumeration