libunibreak  4.3
linebreakdef.h
Go to the documentation of this file.
1 /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 
3 /*
4  * Line breaking in a Unicode sequence. Designed to be used in a
5  * generic text renderer.
6  *
7  * Copyright (C) 2008-2020 Wu Yongwei <wuyongwei at gmail dot com>
8  * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
9  *
10  * This software is provided 'as-is', without any express or implied
11  * warranty. In no event will the author be held liable for any damages
12  * arising from the use of this software.
13  *
14  * Permission is granted to anyone to use this software for any purpose,
15  * including commercial applications, and to alter it and redistribute
16  * it freely, subject to the following restrictions:
17  *
18  * 1. The origin of this software must not be misrepresented; you must
19  * not claim that you wrote the original software. If you use this
20  * software in a product, an acknowledgement in the product
21  * documentation would be appreciated but is not required.
22  * 2. Altered source versions must be plainly marked as such, and must
23  * not be misrepresented as being the original software.
24  * 3. This notice may not be removed or altered from any source
25  * distribution.
26  *
27  * The main reference is Unicode Standard Annex 14 (UAX #14):
28  * <URL:http://www.unicode.org/reports/tr14/>
29  *
30  * When this library was designed, this annex was at Revision 19, for
31  * Unicode 5.0.0:
32  * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
33  *
34  * This library has been updated according to Revision 45, for
35  * Unicode 13.0.0:
36  * <URL:http://www.unicode.org/reports/tr14/tr14-45.html>
37  *
38  * The Unicode Terms of Use are available at
39  * <URL:http://www.unicode.org/copyright.html>
40  */
41 
52 #include "unibreakdef.h"
53 
59 {
60  /* This is used to signal an error condition. */
63  /* The following break classes are treated in the pair table. */
97  /* The following break class is treated in the pair table, but it is
98  * not part of Table 2 of UAX #14-37. */
101  /* The following break classes are not treated in the pair table */
111  LBP_XX
112 };
113 
119 {
122  enum LineBreakClass prop;
123 };
124 
130 {
131  const char *lang;
132  size_t namelen;
133  const struct LineBreakProperties *lbp;
134 };
135 
141 {
142  const char *lang;
143  const struct LineBreakProperties *lbpLang;
145  enum LineBreakClass lbcCur;
146  enum LineBreakClass lbcNew;
147  enum LineBreakClass lbcLast;
148  bool fLb8aZwj;
151  int cLb30aRI;
152 };
153 
154 /* Declarations */
155 extern const struct LineBreakProperties lb_prop_default[];
156 extern const struct LineBreakPropertiesLang lb_prop_lang_map[];
157 
158 /* Function Prototype */
160  struct LineBreakContext *lbpCtx,
161  utf32_t ch,
162  const char *lang);
164  struct LineBreakContext *lbpCtx,
165  utf32_t ch);
166 void set_linebreaks(
167  const void *s,
168  size_t len,
169  const char *lang,
170  char *brks,
171  get_next_char_t get_next_char);
LBP_CB
@ LBP_CB
Contingent break.
Definition: linebreakdef.h:99
utf32_t
unsigned int utf32_t
Type for UTF-32 data points.
Definition: unibreakbase.h:49
get_next_char_t
utf32_t(* get_next_char_t)(const void *, size_t, size_t *)
Abstract function interface for ub_get_next_char_utf8, ub_get_next_char_utf16, and ub_get_next_char_u...
Definition: unibreakdef.h:65
LBP_RI
@ LBP_RI
Regional indicator.
Definition: linebreakdef.h:92
LineBreakContext::fLb21aHebrew
bool fLb21aHebrew
Flag for Hebrew letters (LB21a)
Definition: linebreakdef.h:150
LBP_SY
@ LBP_SY
Symbols allowing break after.
Definition: linebreakdef.h:71
LBP_Undefined
@ LBP_Undefined
Undefined.
Definition: linebreakdef.h:61
LBP_AI
@ LBP_AI
Ambiguous (alphabetic or ideograph)
Definition: linebreakdef.h:102
LBP_SA
@ LBP_SA
South-East Asian.
Definition: linebreakdef.h:108
LineBreakClass
LineBreakClass
Line break classes.
Definition: linebreakdef.h:59
LBP_AL
@ LBP_AL
Alphabetic.
Definition: linebreakdef.h:76
LBP_PR
@ LBP_PR
Prefix.
Definition: linebreakdef.h:73
LBP_H3
@ LBP_H3
Hangul LVT.
Definition: linebreakdef.h:88
LBP_EM
@ LBP_EM
Emoji modifier.
Definition: linebreakdef.h:94
LBP_EX
@ LBP_EX
Exclamation/Interrogation.
Definition: linebreakdef.h:70
LineBreakPropertiesLang::lang
const char * lang
Language name.
Definition: linebreakdef.h:131
LineBreakContext::lbcCur
enum LineBreakClass lbcCur
Breaking class of current codepoint.
Definition: linebreakdef.h:145
LBP_GL
@ LBP_GL
Glue.
Definition: linebreakdef.h:68
LineBreakContext::cLb30aRI
int cLb30aRI
Count of RI characters (LB30a)
Definition: linebreakdef.h:151
LineBreakPropertiesLang::lbp
const struct LineBreakProperties * lbp
Pointer to associated data.
Definition: linebreakdef.h:133
LBP_NS
@ LBP_NS
Non-starters.
Definition: linebreakdef.h:69
LBP_CM
@ LBP_CM
Combining marks.
Definition: linebreakdef.h:85
LineBreakContext::fLb8aZwj
bool fLb8aZwj
Flag for ZWJ (LB8a)
Definition: linebreakdef.h:148
LBP_JT
@ LBP_JT
Hangul T Jamo.
Definition: linebreakdef.h:91
LBP_ID
@ LBP_ID
Ideographic.
Definition: linebreakdef.h:78
LineBreakPropertiesLang::namelen
size_t namelen
Length of name to match.
Definition: linebreakdef.h:132
LBP_JV
@ LBP_JV
Hangul V Jamo.
Definition: linebreakdef.h:90
LBP_XX
@ LBP_XX
Unknown.
Definition: linebreakdef.h:111
LineBreakContext
Context representing internal state of the line breaking algorithm.
Definition: linebreakdef.h:141
LBP_BK
@ LBP_BK
Break (mandatory)
Definition: linebreakdef.h:103
LBP_B2
@ LBP_B2
Break on either side (but not pair)
Definition: linebreakdef.h:83
lb_init_break_context
void lb_init_break_context(struct LineBreakContext *lbpCtx, utf32_t ch, const char *lang)
Initializes line breaking context for a given language.
Definition: linebreak.c:678
LBP_NL
@ LBP_NL
Next line.
Definition: linebreakdef.h:107
LineBreakPropertiesLang
Struct for association of language-specific line breaking properties with language names.
Definition: linebreakdef.h:130
LBP_IN
@ LBP_IN
Inseparable characters.
Definition: linebreakdef.h:79
LineBreakProperties::start
utf32_t start
Start codepoint.
Definition: linebreakdef.h:120
LBP_OP
@ LBP_OP
Opening punctuation.
Definition: linebreakdef.h:64
LBP_CL
@ LBP_CL
Closing punctuation.
Definition: linebreakdef.h:65
LBP_LF
@ LBP_LF
Line feed.
Definition: linebreakdef.h:106
lb_prop_lang_map
const struct LineBreakPropertiesLang lb_prop_lang_map[]
Association data of language-specific line breaking properties with language names.
Definition: linebreakdef.c:117
LBP_PO
@ LBP_PO
Postfix.
Definition: linebreakdef.h:74
set_linebreaks
void set_linebreaks(const void *s, size_t len, const char *lang, char *brks, get_next_char_t get_next_char)
Sets the line breaking information for a generic input string.
Definition: linebreak.c:784
LineBreakContext::lang
const char * lang
Language name.
Definition: linebreakdef.h:142
LBP_CP
@ LBP_CP
Closing parenthesis.
Definition: linebreakdef.h:66
LineBreakContext::lbcLast
enum LineBreakClass lbcLast
Breaking class of last codepoint.
Definition: linebreakdef.h:147
LBP_NU
@ LBP_NU
Numeric.
Definition: linebreakdef.h:75
LBP_EB
@ LBP_EB
Emoji base.
Definition: linebreakdef.h:93
LineBreakProperties::prop
enum LineBreakClass prop
The line breaking property.
Definition: linebreakdef.h:122
LBP_BA
@ LBP_BA
Break after.
Definition: linebreakdef.h:81
LBP_QU
@ LBP_QU
Ambiguous quotation.
Definition: linebreakdef.h:67
lb_process_next_char
int lb_process_next_char(struct LineBreakContext *lbpCtx, utf32_t ch)
Updates LineBreakingContext for the next codepoint and returns the detected break.
Definition: linebreak.c:709
LBP_H2
@ LBP_H2
Hangul LV.
Definition: linebreakdef.h:87
LBP_SG
@ LBP_SG
Surrogates.
Definition: linebreakdef.h:109
LBP_SP
@ LBP_SP
Space.
Definition: linebreakdef.h:110
LBP_JL
@ LBP_JL
Hangul L Jamo.
Definition: linebreakdef.h:89
LBP_ZW
@ LBP_ZW
Zero-width space.
Definition: linebreakdef.h:84
LBP_CJ
@ LBP_CJ
Conditional Japanese starter.
Definition: linebreakdef.h:104
LineBreakContext::lbpLang
const struct LineBreakProperties * lbpLang
Pointer to LineBreakProperties.
Definition: linebreakdef.h:143
LineBreakContext::lbcNew
enum LineBreakClass lbcNew
Breaking class of next codepoint.
Definition: linebreakdef.h:146
LineBreakProperties
Struct for entries of line break properties.
Definition: linebreakdef.h:119
LBP_CR
@ LBP_CR
Carriage return.
Definition: linebreakdef.h:105
unibreakdef.h
LineBreakProperties::end
utf32_t end
End codepoint, inclusive.
Definition: linebreakdef.h:121
lb_prop_default
const struct LineBreakProperties lb_prop_default[]
Default line breaking properties as from the Unicode Web site.
Definition: linebreakdata.c:9
LBP_BB
@ LBP_BB
Break before.
Definition: linebreakdef.h:82
LineBreakContext::fLb10LeadSpace
bool fLb10LeadSpace
Flag for leading space (LB10)
Definition: linebreakdef.h:149
LBP_ZWJ
@ LBP_ZWJ
Zero width joiner.
Definition: linebreakdef.h:95
LBP_HL
@ LBP_HL
Hebrew letter.
Definition: linebreakdef.h:77
LBP_WJ
@ LBP_WJ
Word joiner.
Definition: linebreakdef.h:86
LBP_HY
@ LBP_HY
Hyphen.
Definition: linebreakdef.h:80
LBP_IS
@ LBP_IS
Infix separator.
Definition: linebreakdef.h:72