Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,18 @@ static void parseSourceFile(
&& (propInfo = propInfoSet.iterator().next()).special
== SpecialProperty.None
&& propInfo.getFieldNumber(indexUnicodeProperties.ucdVersion) == 1) {
if (fileName.equals("math/*/MathClass")
&& indexUnicodeProperties.ucdVersion.compareTo(
VersionInfo.UNICODE_6_3)
<= 0) {
parser =
parser.withLinePreprocessor(
s ->
s.startsWith("1D455=210E;")
|| s.equals("code point;class")
? "#" + s
: s);
}
parseSimpleFieldFile(
parser.withMissing(true),
propInfo,
Expand All @@ -674,6 +686,20 @@ static void parseSourceFile(
? null
: nextProperties.getProperty(propInfo.property));
} else {
if (fileName.equals("math/*/MathClassEx")
&& indexUnicodeProperties.ucdVersion.compareTo(
VersionInfo.UNICODE_6_3)
<= 0) {
parser =
parser.withLinePreprocessor(
s ->
s.startsWith("FE61-FE68;")
? s.replaceFirst(
"FE61-FE68;", "FE61..FE68;")
: s.startsWith("1D455=210E;")
? "#" + s
: s);
}
parseFieldFile(
parser.withMissing(true),
indexUnicodeProperties,
Expand Down Expand Up @@ -1510,6 +1536,38 @@ private static void parseFields(
value = "No";
}
}
if ((propInfo.property == UcdProperty.Math_Entity_Name
|| propInfo.property == UcdProperty.Math_Entity_Set)
&& indexUnicodeProperties.ucdVersion.compareTo(Utility.UTR25_REVISION_16)
< 0) {
merger = IndexUnicodeProperties.MULTIVALUED_JOINER;
}
if (propInfo.property == UcdProperty.Math_Descriptive_Comments
&& indexUnicodeProperties.ucdVersion.compareTo(Utility.UTR25_REVISION_16)
< 0) {
merger = new PropertyUtilities.NullIgnorer();
}
if (propInfo.property == UcdProperty.Math_Class_Ex
&& indexUnicodeProperties.ucdVersion.compareTo(Utility.UTR25_REVISION_16)
< 0) {
merger = new PropertyUtilities.RedundancyIgnorer();
}
if (propInfo.property == UcdProperty.Math_Class_Ex
&& indexUnicodeProperties.ucdVersion.compareTo(VersionInfo.UNICODE_6_0) < 0
&& (line.getRange().start == 0x2020 || line.getRange().start == 0x2021)
&& line.getRange().end == line.getRange().start
&& value.equals("N")) {
value = "R";
}
if (propInfo.property == UcdProperty.Math_Class_Ex
&& indexUnicodeProperties.ucdVersion.compareTo(VersionInfo.UNICODE_6_1) < 0
&& value.isEmpty()) {
// MathClassEx-12 has
// 27CA;;;;;;VERTICAL BAR WITH HORIZONTAL STROKE
// MathClassEx-11 has
// 21EA..21F3;;⇪..⇳;;;; 21EA-21F3 are keyboard
value = "None";
}
propInfo.put(
data,
line.getMissingSet(),
Expand Down Expand Up @@ -1646,6 +1704,27 @@ private static void parseSimpleFieldFile(
}
}
continue;
} else if (propInfo.property == UcdProperty.Math_Class
&& version.compareTo(VersionInfo.UNICODE_6_0) < 0) {
// MathClass-11 had conflicting assignments for these two characters. Instead
// of making Math_Class multivalued, keep the one that stayed (R), and discard
// the N.
if ((line.getRange().start == 0x2020 || line.getRange().start == 0x2021)
&& line.getRange().start == line.getRange().end
&& line.getParts()[1].equals("N")) {
continue;
}
// MathClass-9 had the same problem for U+0021 ! as well.
if (version.compareTo(VersionInfo.UNICODE_5_1) < 0
&& line.getRange().start == 0x0021
&& line.getRange().start == line.getRange().end
&& line.getParts()[1].equals("P")) {
continue;
}
// MathClass-11 had a line without a value, 21EA..21F3;
if (line.getParts()[1].isEmpty()) {
line.getParts()[1] = "None";
}
} else if (line.getParts().length != 2
&& version.compareTo(VersionInfo.UNICODE_3_0_1) > 0) {
// Unicode 3.0 and earlier had name comments as an extra field.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.unicode.props;

import com.google.common.base.Objects;
import com.ibm.icu.impl.UnicodeMap;
import com.ibm.icu.text.UnicodeSet;
import java.util.Collection;
Expand Down Expand Up @@ -37,6 +38,34 @@ public String merge(String first, String second) {
}
}

public static final class NullIgnorer implements Merge<String> {
public NullIgnorer() {}

@Override
public String merge(String first, String second) {
if (second == null) {
return first;
} else {
throw new UnicodePropertyException(
"Key already present in UnicodeMap:\told: " + first + ",\tnew: " + second);
}
}
}

public static final class RedundancyIgnorer implements Merge<String> {
public RedundancyIgnorer() {}

@Override
public String merge(String first, String second) {
if (Objects.equal(first, second)) {
return first;
} else {
throw new UnicodePropertyException(
"Key already present in UnicodeMap:\told: " + first + ",\tnew: " + second);
}
}
}

static final <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) {
final V oldValue = map.get(key);
if (oldValue != null) {
Expand Down
27 changes: 25 additions & 2 deletions unicodetools/src/main/java/org/unicode/props/UcdLineParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.util.RegexUtilities;
Expand Down Expand Up @@ -85,18 +86,21 @@ public enum Contents {
private final ArrayList<String> partsList = new ArrayList<>();
private String[] parts = null;
private final IntRange intRange = new IntRange();
private final Function<String, String> linePreprocessor;

UcdLine(
Pattern splitPattern,
boolean withRange,
boolean withMissing,
Iterator<String> rawLines,
UcdFileStats stats) {
UcdFileStats stats,
Function<String, String> linePreprocessor) {
splitter = splitPattern.matcher("");
this.withRange = withRange;
this.withMissing = withMissing;
this.rawLines = rawLines;
this.stats = stats;
this.linePreprocessor = linePreprocessor;
}

@Override
Expand All @@ -117,6 +121,9 @@ public boolean hasNext() {
|| line.startsWith(">>>>>>>")) {
line2 = "";
}
if (linePreprocessor != null) {
line2 = linePreprocessor.apply(line2);
}
++stats.lineCount;
final int hashPos = line2.indexOf('#');
if (hashPos >= 0) {
Expand Down Expand Up @@ -223,6 +230,7 @@ public UnicodeSet getMissingSet() {
private boolean withTabs = false;
private boolean withRange = true;
private boolean withMissing = false;
private Function<String, String> linePreprocessor;
private final Iterable<String> rawLines;
private final UcdFileStats stats = new UcdFileStats();

Expand All @@ -245,10 +253,25 @@ public UcdLineParser withMissing(boolean m) {
return this;
}

// Sets a line preprocessor to which the line is fed before removing comments,
// splitting fields, and decoding ranges.
// This makes it possible to correct lines with ill-formed ranges.
// For corrections affecting only subsequent fields rather than the range,
// prefer handling in the parse* functions in PropertyParsingInfo.
public UcdLineParser withLinePreprocessor(Function<String, String> f) {
linePreprocessor = f;
return this;
}

@Override
public Iterator<UcdLine> iterator() {
return new UcdLine(
withTabs ? TAB : SEMICOLON, withRange, withMissing, rawLines.iterator(), stats);
withTabs ? TAB : SEMICOLON,
withRange,
withMissing,
rawLines.iterator(),
stats,
linePreprocessor);
}

public int getLineCount() {
Expand Down
30 changes: 30 additions & 0 deletions unicodetools/src/main/java/org/unicode/props/UcdProperty.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import org.unicode.props.UcdPropertyValues.Joining_Group_Values;
import org.unicode.props.UcdPropertyValues.Joining_Type_Values;
import org.unicode.props.UcdPropertyValues.Line_Break_Values;
import org.unicode.props.UcdPropertyValues.Math_Class_Ex_Values;
import org.unicode.props.UcdPropertyValues.Math_Class_Values;
import org.unicode.props.UcdPropertyValues.NFC_Quick_Check_Values;
import org.unicode.props.UcdPropertyValues.NFD_Quick_Check_Values;
import org.unicode.props.UcdPropertyValues.NFKC_Quick_Check_Values;
Expand Down Expand Up @@ -124,6 +126,22 @@ public enum UcdProperty {
Emoji_SB(PropertyType.Miscellaneous, DerivedPropertyStatus.UCDNonProperty, "ESB"),
ISO_Comment(PropertyType.Miscellaneous, DerivedPropertyStatus.Approved, "isc"),
Jamo_Short_Name(PropertyType.Miscellaneous, DerivedPropertyStatus.Approved, "JSN"),
Math_Descriptive_Comments(
PropertyType.Miscellaneous,
DerivedPropertyStatus.NonUCDNonProperty,
"Math_Descriptive_Comments"),
Math_Entity_Name(
PropertyType.Miscellaneous,
DerivedPropertyStatus.NonUCDNonProperty,
null,
ValueCardinality.Unordered,
"Math_Entity_Name"),
Math_Entity_Set(
PropertyType.Miscellaneous,
DerivedPropertyStatus.NonUCDNonProperty,
null,
ValueCardinality.Unordered,
"Math_Entity_Set"),
Name(PropertyType.Miscellaneous, DerivedPropertyStatus.Approved, "na"),
Name_Alias(
PropertyType.Miscellaneous,
Expand Down Expand Up @@ -713,6 +731,18 @@ public enum UcdProperty {
Line_Break_Values.class,
null,
"lb"),
Math_Class(
PropertyType.Enumerated,
DerivedPropertyStatus.NonUCDProperty,
Math_Class_Values.class,
null,
"Math_Class"),
Math_Class_Ex(
PropertyType.Enumerated,
DerivedPropertyStatus.NonUCDNonProperty,
Math_Class_Ex_Values.class,
null,
"Math_Class_Ex"),
NFC_Quick_Check(
PropertyType.Enumerated,
DerivedPropertyStatus.Approved,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1817,6 +1817,96 @@ public static Line_Break_Values forName(String name) {
}

// Lowercase_Mapping
public enum Math_Class_Values implements Named {
None("None"),
Normal("N"),
Alphabetic("A"),
Binary("B"),
Closing("C"),
Diacritic("D"),
Fence("F"),
Glyph_Part("G"),
Invisible("I"),
Large("L"),
Opening("O"),
Punctuation("P"),
Relation("R", "R?"),
Space("S"),
Unary("U"),
Vary("V"),
Special("X");
private final PropertyNames<Math_Class_Values> names;

private Math_Class_Values(String shortName, String... otherNames) {
names =
new PropertyNames<Math_Class_Values>(
Math_Class_Values.class, this, shortName, otherNames);
}

@Override
public PropertyNames<Math_Class_Values> getNames() {
return names;
}

@Override
public String getShortName() {
return names.getShortName();
}

private static final NameMatcher<Math_Class_Values> NAME_MATCHER =
PropertyNames.getNameToEnums(Math_Class_Values.class);

public static Math_Class_Values forName(String name) {
return NAME_MATCHER.get(name);
}
}

public enum Math_Class_Ex_Values implements Named {
None("None"),
Normal("N"),
Alphabetic("A"),
Binary("B"),
Closing("C"),
Diacritic("D"),
Fence("F"),
Glyph_Part("G"),
Large("L"),
Opening("O"),
Punctuation("P"),
Relation("R", "R?"),
Space("S"),
Unary("U"),
Vary("V"),
Special("X");
private final PropertyNames<Math_Class_Ex_Values> names;

private Math_Class_Ex_Values(String shortName, String... otherNames) {
names =
new PropertyNames<Math_Class_Ex_Values>(
Math_Class_Ex_Values.class, this, shortName, otherNames);
}

@Override
public PropertyNames<Math_Class_Ex_Values> getNames() {
return names;
}

@Override
public String getShortName() {
return names.getShortName();
}

private static final NameMatcher<Math_Class_Ex_Values> NAME_MATCHER =
PropertyNames.getNameToEnums(Math_Class_Ex_Values.class);

public static Math_Class_Ex_Values forName(String name) {
return NAME_MATCHER.get(name);
}
}

// Math_Descriptive_Comments
// Math_Entity_Name
// Math_Entity_Set
// Name
// Name_Alias
// Named_Sequences
Expand Down
Loading