root/fs/hfsplus/unicode.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. case_fold
  2. hfsplus_strcasecmp
  3. hfsplus_strcmp
  4. hfsplus_compose_lookup
  5. hfsplus_uni2asc
  6. asc2unichar
  7. hfsplus_decompose_nonhangul
  8. hfsplus_try_decompose_hangul
  9. decompose_unichar
  10. hfsplus_asc2uni
  11. hfsplus_hash_dentry
  12. hfsplus_compare_dentry

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *  linux/fs/hfsplus/unicode.c
   4  *
   5  * Copyright (C) 2001
   6  * Brad Boyer (flar@allandria.com)
   7  * (C) 2003 Ardis Technologies <roman@ardistech.com>
   8  *
   9  * Handler routines for unicode strings
  10  */
  11 
  12 #include <linux/types.h>
  13 #include <linux/nls.h>
  14 #include "hfsplus_fs.h"
  15 #include "hfsplus_raw.h"
  16 
  17 /* Fold the case of a unicode char, given the 16 bit value */
  18 /* Returns folded char, or 0 if ignorable */
  19 static inline u16 case_fold(u16 c)
  20 {
  21         u16 tmp;
  22 
  23         tmp = hfsplus_case_fold_table[c >> 8];
  24         if (tmp)
  25                 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
  26         else
  27                 tmp = c;
  28         return tmp;
  29 }
  30 
  31 /* Compare unicode strings, return values like normal strcmp */
  32 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
  33                        const struct hfsplus_unistr *s2)
  34 {
  35         u16 len1, len2, c1, c2;
  36         const hfsplus_unichr *p1, *p2;
  37 
  38         len1 = be16_to_cpu(s1->length);
  39         len2 = be16_to_cpu(s2->length);
  40         p1 = s1->unicode;
  41         p2 = s2->unicode;
  42 
  43         while (1) {
  44                 c1 = c2 = 0;
  45 
  46                 while (len1 && !c1) {
  47                         c1 = case_fold(be16_to_cpu(*p1));
  48                         p1++;
  49                         len1--;
  50                 }
  51                 while (len2 && !c2) {
  52                         c2 = case_fold(be16_to_cpu(*p2));
  53                         p2++;
  54                         len2--;
  55                 }
  56 
  57                 if (c1 != c2)
  58                         return (c1 < c2) ? -1 : 1;
  59                 if (!c1 && !c2)
  60                         return 0;
  61         }
  62 }
  63 
  64 /* Compare names as a sequence of 16-bit unsigned integers */
  65 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
  66                    const struct hfsplus_unistr *s2)
  67 {
  68         u16 len1, len2, c1, c2;
  69         const hfsplus_unichr *p1, *p2;
  70         int len;
  71 
  72         len1 = be16_to_cpu(s1->length);
  73         len2 = be16_to_cpu(s2->length);
  74         p1 = s1->unicode;
  75         p2 = s2->unicode;
  76 
  77         for (len = min(len1, len2); len > 0; len--) {
  78                 c1 = be16_to_cpu(*p1);
  79                 c2 = be16_to_cpu(*p2);
  80                 if (c1 != c2)
  81                         return c1 < c2 ? -1 : 1;
  82                 p1++;
  83                 p2++;
  84         }
  85 
  86         return len1 < len2 ? -1 :
  87                len1 > len2 ? 1 : 0;
  88 }
  89 
  90 
  91 #define Hangul_SBase    0xac00
  92 #define Hangul_LBase    0x1100
  93 #define Hangul_VBase    0x1161
  94 #define Hangul_TBase    0x11a7
  95 #define Hangul_SCount   11172
  96 #define Hangul_LCount   19
  97 #define Hangul_VCount   21
  98 #define Hangul_TCount   28
  99 #define Hangul_NCount   (Hangul_VCount * Hangul_TCount)
 100 
 101 
 102 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
 103 {
 104         int i, s, e;
 105 
 106         s = 1;
 107         e = p[1];
 108         if (!e || cc < p[s * 2] || cc > p[e * 2])
 109                 return NULL;
 110         do {
 111                 i = (s + e) / 2;
 112                 if (cc > p[i * 2])
 113                         s = i + 1;
 114                 else if (cc < p[i * 2])
 115                         e = i - 1;
 116                 else
 117                         return hfsplus_compose_table + p[i * 2 + 1];
 118         } while (s <= e);
 119         return NULL;
 120 }
 121 
 122 int hfsplus_uni2asc(struct super_block *sb,
 123                 const struct hfsplus_unistr *ustr,
 124                 char *astr, int *len_p)
 125 {
 126         const hfsplus_unichr *ip;
 127         struct nls_table *nls = HFSPLUS_SB(sb)->nls;
 128         u8 *op;
 129         u16 cc, c0, c1;
 130         u16 *ce1, *ce2;
 131         int i, len, ustrlen, res, compose;
 132 
 133         op = astr;
 134         ip = ustr->unicode;
 135         ustrlen = be16_to_cpu(ustr->length);
 136         len = *len_p;
 137         ce1 = NULL;
 138         compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 139 
 140         while (ustrlen > 0) {
 141                 c0 = be16_to_cpu(*ip++);
 142                 ustrlen--;
 143                 /* search for single decomposed char */
 144                 if (likely(compose))
 145                         ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
 146                 if (ce1)
 147                         cc = ce1[0];
 148                 else
 149                         cc = 0;
 150                 if (cc) {
 151                         /* start of a possibly decomposed Hangul char */
 152                         if (cc != 0xffff)
 153                                 goto done;
 154                         if (!ustrlen)
 155                                 goto same;
 156                         c1 = be16_to_cpu(*ip) - Hangul_VBase;
 157                         if (c1 < Hangul_VCount) {
 158                                 /* compose the Hangul char */
 159                                 cc = (c0 - Hangul_LBase) * Hangul_VCount;
 160                                 cc = (cc + c1) * Hangul_TCount;
 161                                 cc += Hangul_SBase;
 162                                 ip++;
 163                                 ustrlen--;
 164                                 if (!ustrlen)
 165                                         goto done;
 166                                 c1 = be16_to_cpu(*ip) - Hangul_TBase;
 167                                 if (c1 > 0 && c1 < Hangul_TCount) {
 168                                         cc += c1;
 169                                         ip++;
 170                                         ustrlen--;
 171                                 }
 172                                 goto done;
 173                         }
 174                 }
 175                 while (1) {
 176                         /* main loop for common case of not composed chars */
 177                         if (!ustrlen)
 178                                 goto same;
 179                         c1 = be16_to_cpu(*ip);
 180                         if (likely(compose))
 181                                 ce1 = hfsplus_compose_lookup(
 182                                         hfsplus_compose_table, c1);
 183                         if (ce1)
 184                                 break;
 185                         switch (c0) {
 186                         case 0:
 187                                 c0 = 0x2400;
 188                                 break;
 189                         case '/':
 190                                 c0 = ':';
 191                                 break;
 192                         }
 193                         res = nls->uni2char(c0, op, len);
 194                         if (res < 0) {
 195                                 if (res == -ENAMETOOLONG)
 196                                         goto out;
 197                                 *op = '?';
 198                                 res = 1;
 199                         }
 200                         op += res;
 201                         len -= res;
 202                         c0 = c1;
 203                         ip++;
 204                         ustrlen--;
 205                 }
 206                 ce2 = hfsplus_compose_lookup(ce1, c0);
 207                 if (ce2) {
 208                         i = 1;
 209                         while (i < ustrlen) {
 210                                 ce1 = hfsplus_compose_lookup(ce2,
 211                                         be16_to_cpu(ip[i]));
 212                                 if (!ce1)
 213                                         break;
 214                                 i++;
 215                                 ce2 = ce1;
 216                         }
 217                         cc = ce2[0];
 218                         if (cc) {
 219                                 ip += i;
 220                                 ustrlen -= i;
 221                                 goto done;
 222                         }
 223                 }
 224 same:
 225                 switch (c0) {
 226                 case 0:
 227                         cc = 0x2400;
 228                         break;
 229                 case '/':
 230                         cc = ':';
 231                         break;
 232                 default:
 233                         cc = c0;
 234                 }
 235 done:
 236                 res = nls->uni2char(cc, op, len);
 237                 if (res < 0) {
 238                         if (res == -ENAMETOOLONG)
 239                                 goto out;
 240                         *op = '?';
 241                         res = 1;
 242                 }
 243                 op += res;
 244                 len -= res;
 245         }
 246         res = 0;
 247 out:
 248         *len_p = (char *)op - astr;
 249         return res;
 250 }
 251 
 252 /*
 253  * Convert one or more ASCII characters into a single unicode character.
 254  * Returns the number of ASCII characters corresponding to the unicode char.
 255  */
 256 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
 257                               wchar_t *uc)
 258 {
 259         int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
 260         if (size <= 0) {
 261                 *uc = '?';
 262                 size = 1;
 263         }
 264         switch (*uc) {
 265         case 0x2400:
 266                 *uc = 0;
 267                 break;
 268         case ':':
 269                 *uc = '/';
 270                 break;
 271         }
 272         return size;
 273 }
 274 
 275 /* Decomposes a non-Hangul unicode character. */
 276 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
 277 {
 278         int off;
 279 
 280         off = hfsplus_decompose_table[(uc >> 12) & 0xf];
 281         if (off == 0 || off == 0xffff)
 282                 return NULL;
 283 
 284         off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
 285         if (!off)
 286                 return NULL;
 287 
 288         off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
 289         if (!off)
 290                 return NULL;
 291 
 292         off = hfsplus_decompose_table[off + (uc & 0xf)];
 293         *size = off & 3;
 294         if (*size == 0)
 295                 return NULL;
 296         return hfsplus_decompose_table + (off / 4);
 297 }
 298 
 299 /*
 300  * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
 301  * precomposed Hangul, otherwise return the length of the decomposition.
 302  *
 303  * This function was adapted from sample code from the Unicode Standard
 304  * Annex #15: Unicode Normalization Forms, version 3.2.0.
 305  *
 306  * Copyright (C) 1991-2018 Unicode, Inc.  All rights reserved.  Distributed
 307  * under the Terms of Use in http://www.unicode.org/copyright.html.
 308  */
 309 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
 310 {
 311         int index;
 312         int l, v, t;
 313 
 314         index = uc - Hangul_SBase;
 315         if (index < 0 || index >= Hangul_SCount)
 316                 return 0;
 317 
 318         l = Hangul_LBase + index / Hangul_NCount;
 319         v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
 320         t = Hangul_TBase + index % Hangul_TCount;
 321 
 322         result[0] = l;
 323         result[1] = v;
 324         if (t != Hangul_TBase) {
 325                 result[2] = t;
 326                 return 3;
 327         }
 328         return 2;
 329 }
 330 
 331 /* Decomposes a single unicode character. */
 332 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
 333 {
 334         u16 *result;
 335 
 336         /* Hangul is handled separately */
 337         result = hangul_buffer;
 338         *size = hfsplus_try_decompose_hangul(uc, result);
 339         if (*size == 0)
 340                 result = hfsplus_decompose_nonhangul(uc, size);
 341         return result;
 342 }
 343 
 344 int hfsplus_asc2uni(struct super_block *sb,
 345                     struct hfsplus_unistr *ustr, int max_unistr_len,
 346                     const char *astr, int len)
 347 {
 348         int size, dsize, decompose;
 349         u16 *dstr, outlen = 0;
 350         wchar_t c;
 351         u16 dhangul[3];
 352 
 353         decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 354         while (outlen < max_unistr_len && len > 0) {
 355                 size = asc2unichar(sb, astr, len, &c);
 356 
 357                 if (decompose)
 358                         dstr = decompose_unichar(c, &dsize, dhangul);
 359                 else
 360                         dstr = NULL;
 361                 if (dstr) {
 362                         if (outlen + dsize > max_unistr_len)
 363                                 break;
 364                         do {
 365                                 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
 366                         } while (--dsize > 0);
 367                 } else
 368                         ustr->unicode[outlen++] = cpu_to_be16(c);
 369 
 370                 astr += size;
 371                 len -= size;
 372         }
 373         ustr->length = cpu_to_be16(outlen);
 374         if (len > 0)
 375                 return -ENAMETOOLONG;
 376         return 0;
 377 }
 378 
 379 /*
 380  * Hash a string to an integer as appropriate for the HFS+ filesystem.
 381  * Composed unicode characters are decomposed and case-folding is performed
 382  * if the appropriate bits are (un)set on the superblock.
 383  */
 384 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
 385 {
 386         struct super_block *sb = dentry->d_sb;
 387         const char *astr;
 388         const u16 *dstr;
 389         int casefold, decompose, size, len;
 390         unsigned long hash;
 391         wchar_t c;
 392         u16 c2;
 393         u16 dhangul[3];
 394 
 395         casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
 396         decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 397         hash = init_name_hash(dentry);
 398         astr = str->name;
 399         len = str->len;
 400         while (len > 0) {
 401                 int uninitialized_var(dsize);
 402                 size = asc2unichar(sb, astr, len, &c);
 403                 astr += size;
 404                 len -= size;
 405 
 406                 if (decompose)
 407                         dstr = decompose_unichar(c, &dsize, dhangul);
 408                 else
 409                         dstr = NULL;
 410                 if (dstr) {
 411                         do {
 412                                 c2 = *dstr++;
 413                                 if (casefold)
 414                                         c2 = case_fold(c2);
 415                                 if (!casefold || c2)
 416                                         hash = partial_name_hash(c2, hash);
 417                         } while (--dsize > 0);
 418                 } else {
 419                         c2 = c;
 420                         if (casefold)
 421                                 c2 = case_fold(c2);
 422                         if (!casefold || c2)
 423                                 hash = partial_name_hash(c2, hash);
 424                 }
 425         }
 426         str->hash = end_name_hash(hash);
 427 
 428         return 0;
 429 }
 430 
 431 /*
 432  * Compare strings with HFS+ filename ordering.
 433  * Composed unicode characters are decomposed and case-folding is performed
 434  * if the appropriate bits are (un)set on the superblock.
 435  */
 436 int hfsplus_compare_dentry(const struct dentry *dentry,
 437                 unsigned int len, const char *str, const struct qstr *name)
 438 {
 439         struct super_block *sb = dentry->d_sb;
 440         int casefold, decompose, size;
 441         int dsize1, dsize2, len1, len2;
 442         const u16 *dstr1, *dstr2;
 443         const char *astr1, *astr2;
 444         u16 c1, c2;
 445         wchar_t c;
 446         u16 dhangul_1[3], dhangul_2[3];
 447 
 448         casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
 449         decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
 450         astr1 = str;
 451         len1 = len;
 452         astr2 = name->name;
 453         len2 = name->len;
 454         dsize1 = dsize2 = 0;
 455         dstr1 = dstr2 = NULL;
 456 
 457         while (len1 > 0 && len2 > 0) {
 458                 if (!dsize1) {
 459                         size = asc2unichar(sb, astr1, len1, &c);
 460                         astr1 += size;
 461                         len1 -= size;
 462 
 463                         if (decompose)
 464                                 dstr1 = decompose_unichar(c, &dsize1,
 465                                                           dhangul_1);
 466                         if (!decompose || !dstr1) {
 467                                 c1 = c;
 468                                 dstr1 = &c1;
 469                                 dsize1 = 1;
 470                         }
 471                 }
 472 
 473                 if (!dsize2) {
 474                         size = asc2unichar(sb, astr2, len2, &c);
 475                         astr2 += size;
 476                         len2 -= size;
 477 
 478                         if (decompose)
 479                                 dstr2 = decompose_unichar(c, &dsize2,
 480                                                           dhangul_2);
 481                         if (!decompose || !dstr2) {
 482                                 c2 = c;
 483                                 dstr2 = &c2;
 484                                 dsize2 = 1;
 485                         }
 486                 }
 487 
 488                 c1 = *dstr1;
 489                 c2 = *dstr2;
 490                 if (casefold) {
 491                         c1 = case_fold(c1);
 492                         if (!c1) {
 493                                 dstr1++;
 494                                 dsize1--;
 495                                 continue;
 496                         }
 497                         c2 = case_fold(c2);
 498                         if (!c2) {
 499                                 dstr2++;
 500                                 dsize2--;
 501                                 continue;
 502                         }
 503                 }
 504                 if (c1 < c2)
 505                         return -1;
 506                 else if (c1 > c2)
 507                         return 1;
 508 
 509                 dstr1++;
 510                 dsize1--;
 511                 dstr2++;
 512                 dsize2--;
 513         }
 514 
 515         if (len1 < len2)
 516                 return -1;
 517         if (len1 > len2)
 518                 return 1;
 519         return 0;
 520 }

/* [<][>][^][v][top][bottom][index][help] */