1/* 2 * cifs_unicode: Unicode kernel case support 3 * 4 * Function: 5 * Convert a unicode character to upper or lower case using 6 * compressed tables. 7 * 8 * Copyright (c) International Business Machines Corp., 2000,2009 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 18 * the GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 * 24 * 25 * Notes: 26 * These APIs are based on the C library functions. The semantics 27 * should match the C functions but with expanded size operands. 28 * 29 * The upper/lower functions are based on a table created by mkupr. 30 * This is a compressed table of upper and lower case conversion. 31 * 32 */ 33#ifndef _CIFS_UNICODE_H 34#define _CIFS_UNICODE_H 35 36#include <asm/byteorder.h> 37#include <linux/types.h> 38#include <linux/nls.h> 39 40#define UNIUPR_NOLOWER /* Example to not expand lower case tables */ 41 42/* 43 * Windows maps these to the user defined 16 bit Unicode range since they are 44 * reserved symbols (along with \ and /), otherwise illegal to store 45 * in filenames in NTFS 46 */ 47#define UNI_ASTERISK (__u16) ('*' + 0xF000) 48#define UNI_QUESTION (__u16) ('?' + 0xF000) 49#define UNI_COLON (__u16) (':' + 0xF000) 50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000) 51#define UNI_LESSTHAN (__u16) ('<' + 0xF000) 52#define UNI_PIPE (__u16) ('|' + 0xF000) 53#define UNI_SLASH (__u16) ('\\' + 0xF000) 54 55/* 56 * Macs use an older "SFM" mapping of the symbols above. Fortunately it does 57 * not conflict (although almost does) with the mapping above. 58 */ 59 60#define SFM_ASTERISK ((__u16) 0xF021) 61#define SFM_QUESTION ((__u16) 0xF025) 62#define SFM_COLON ((__u16) 0xF022) 63#define SFM_GRTRTHAN ((__u16) 0xF024) 64#define SFM_LESSTHAN ((__u16) 0xF023) 65#define SFM_PIPE ((__u16) 0xF027) 66#define SFM_SLASH ((__u16) 0xF026) 67 68/* 69 * Mapping mechanism to use when one of the seven reserved characters is 70 * encountered. We can only map using one of the mechanisms at a time 71 * since otherwise readdir could return directory entries which we would 72 * not be able to open 73 * 74 * NO_MAP_UNI_RSVD = do not perform any remapping of the character 75 * SFM_MAP_UNI_RSVD = map reserved characters using SFM scheme (MAC compatible) 76 * SFU_MAP_UNI_RSVD = map reserved characters ala SFU ("mapchars" option) 77 * 78 */ 79#define NO_MAP_UNI_RSVD 0 80#define SFM_MAP_UNI_RSVD 1 81#define SFU_MAP_UNI_RSVD 2 82 83/* Just define what we want from uniupr.h. We don't want to define the tables 84 * in each source file. 85 */ 86#ifndef UNICASERANGE_DEFINED 87struct UniCaseRange { 88 wchar_t start; 89 wchar_t end; 90 signed char *table; 91}; 92#endif /* UNICASERANGE_DEFINED */ 93 94#ifndef UNIUPR_NOUPPER 95extern signed char CifsUniUpperTable[512]; 96extern const struct UniCaseRange CifsUniUpperRange[]; 97#endif /* UNIUPR_NOUPPER */ 98 99#ifndef UNIUPR_NOLOWER 100extern signed char CifsUniLowerTable[512]; 101extern const struct UniCaseRange CifsUniLowerRange[]; 102#endif /* UNIUPR_NOLOWER */ 103 104#ifdef __KERNEL__ 105int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, 106 const struct nls_table *cp, int map_type); 107int cifs_utf16_bytes(const __le16 *from, int maxbytes, 108 const struct nls_table *codepage); 109int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *); 110char *cifs_strndup_from_utf16(const char *src, const int maxlen, 111 const bool is_unicode, 112 const struct nls_table *codepage); 113extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen, 114 const struct nls_table *cp, int mapChars); 115extern int cifs_remap(struct cifs_sb_info *cifs_sb); 116#ifdef CONFIG_CIFS_SMB2 117extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen, 118 int *utf16_len, const struct nls_table *cp, 119 int remap); 120#endif /* CONFIG_CIFS_SMB2 */ 121#endif 122 123wchar_t cifs_toupper(wchar_t in); 124 125/* 126 * UniStrcat: Concatenate the second string to the first 127 * 128 * Returns: 129 * Address of the first string 130 */ 131static inline wchar_t * 132UniStrcat(wchar_t *ucs1, const wchar_t *ucs2) 133{ 134 wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */ 135 136 while (*ucs1++) ; /* To end of first string */ 137 ucs1--; /* Return to the null */ 138 while ((*ucs1++ = *ucs2++)) ; /* copy string 2 over */ 139 return anchor; 140} 141 142/* 143 * UniStrchr: Find a character in a string 144 * 145 * Returns: 146 * Address of first occurrence of character in string 147 * or NULL if the character is not in the string 148 */ 149static inline wchar_t * 150UniStrchr(const wchar_t *ucs, wchar_t uc) 151{ 152 while ((*ucs != uc) && *ucs) 153 ucs++; 154 155 if (*ucs == uc) 156 return (wchar_t *) ucs; 157 return NULL; 158} 159 160/* 161 * UniStrcmp: Compare two strings 162 * 163 * Returns: 164 * < 0: First string is less than second 165 * = 0: Strings are equal 166 * > 0: First string is greater than second 167 */ 168static inline int 169UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2) 170{ 171 while ((*ucs1 == *ucs2) && *ucs1) { 172 ucs1++; 173 ucs2++; 174 } 175 return (int) *ucs1 - (int) *ucs2; 176} 177 178/* 179 * UniStrcpy: Copy a string 180 */ 181static inline wchar_t * 182UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2) 183{ 184 wchar_t *anchor = ucs1; /* save the start of result string */ 185 186 while ((*ucs1++ = *ucs2++)) ; 187 return anchor; 188} 189 190/* 191 * UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes) 192 */ 193static inline size_t 194UniStrlen(const wchar_t *ucs1) 195{ 196 int i = 0; 197 198 while (*ucs1++) 199 i++; 200 return i; 201} 202 203/* 204 * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a 205 * string (length limited) 206 */ 207static inline size_t 208UniStrnlen(const wchar_t *ucs1, int maxlen) 209{ 210 int i = 0; 211 212 while (*ucs1++) { 213 i++; 214 if (i >= maxlen) 215 break; 216 } 217 return i; 218} 219 220/* 221 * UniStrncat: Concatenate length limited string 222 */ 223static inline wchar_t * 224UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n) 225{ 226 wchar_t *anchor = ucs1; /* save pointer to string 1 */ 227 228 while (*ucs1++) ; 229 ucs1--; /* point to null terminator of s1 */ 230 while (n-- && (*ucs1 = *ucs2)) { /* copy s2 after s1 */ 231 ucs1++; 232 ucs2++; 233 } 234 *ucs1 = 0; /* Null terminate the result */ 235 return (anchor); 236} 237 238/* 239 * UniStrncmp: Compare length limited string 240 */ 241static inline int 242UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n) 243{ 244 if (!n) 245 return 0; /* Null strings are equal */ 246 while ((*ucs1 == *ucs2) && *ucs1 && --n) { 247 ucs1++; 248 ucs2++; 249 } 250 return (int) *ucs1 - (int) *ucs2; 251} 252 253/* 254 * UniStrncmp_le: Compare length limited string - native to little-endian 255 */ 256static inline int 257UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n) 258{ 259 if (!n) 260 return 0; /* Null strings are equal */ 261 while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) { 262 ucs1++; 263 ucs2++; 264 } 265 return (int) *ucs1 - (int) __le16_to_cpu(*ucs2); 266} 267 268/* 269 * UniStrncpy: Copy length limited string with pad 270 */ 271static inline wchar_t * 272UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n) 273{ 274 wchar_t *anchor = ucs1; 275 276 while (n-- && *ucs2) /* Copy the strings */ 277 *ucs1++ = *ucs2++; 278 279 n++; 280 while (n--) /* Pad with nulls */ 281 *ucs1++ = 0; 282 return anchor; 283} 284 285/* 286 * UniStrncpy_le: Copy length limited string with pad to little-endian 287 */ 288static inline wchar_t * 289UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n) 290{ 291 wchar_t *anchor = ucs1; 292 293 while (n-- && *ucs2) /* Copy the strings */ 294 *ucs1++ = __le16_to_cpu(*ucs2++); 295 296 n++; 297 while (n--) /* Pad with nulls */ 298 *ucs1++ = 0; 299 return anchor; 300} 301 302/* 303 * UniStrstr: Find a string in a string 304 * 305 * Returns: 306 * Address of first match found 307 * NULL if no matching string is found 308 */ 309static inline wchar_t * 310UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2) 311{ 312 const wchar_t *anchor1 = ucs1; 313 const wchar_t *anchor2 = ucs2; 314 315 while (*ucs1) { 316 if (*ucs1 == *ucs2) { 317 /* Partial match found */ 318 ucs1++; 319 ucs2++; 320 } else { 321 if (!*ucs2) /* Match found */ 322 return (wchar_t *) anchor1; 323 ucs1 = ++anchor1; /* No match */ 324 ucs2 = anchor2; 325 } 326 } 327 328 if (!*ucs2) /* Both end together */ 329 return (wchar_t *) anchor1; /* Match found */ 330 return NULL; /* No match */ 331} 332 333#ifndef UNIUPR_NOUPPER 334/* 335 * UniToupper: Convert a unicode character to upper case 336 */ 337static inline wchar_t 338UniToupper(register wchar_t uc) 339{ 340 register const struct UniCaseRange *rp; 341 342 if (uc < sizeof(CifsUniUpperTable)) { 343 /* Latin characters */ 344 return uc + CifsUniUpperTable[uc]; /* Use base tables */ 345 } else { 346 rp = CifsUniUpperRange; /* Use range tables */ 347 while (rp->start) { 348 if (uc < rp->start) /* Before start of range */ 349 return uc; /* Uppercase = input */ 350 if (uc <= rp->end) /* In range */ 351 return uc + rp->table[uc - rp->start]; 352 rp++; /* Try next range */ 353 } 354 } 355 return uc; /* Past last range */ 356} 357 358/* 359 * UniStrupr: Upper case a unicode string 360 */ 361static inline __le16 * 362UniStrupr(register __le16 *upin) 363{ 364 register __le16 *up; 365 366 up = upin; 367 while (*up) { /* For all characters */ 368 *up = cpu_to_le16(UniToupper(le16_to_cpu(*up))); 369 up++; 370 } 371 return upin; /* Return input pointer */ 372} 373#endif /* UNIUPR_NOUPPER */ 374 375#ifndef UNIUPR_NOLOWER 376/* 377 * UniTolower: Convert a unicode character to lower case 378 */ 379static inline wchar_t 380UniTolower(register wchar_t uc) 381{ 382 register const struct UniCaseRange *rp; 383 384 if (uc < sizeof(CifsUniLowerTable)) { 385 /* Latin characters */ 386 return uc + CifsUniLowerTable[uc]; /* Use base tables */ 387 } else { 388 rp = CifsUniLowerRange; /* Use range tables */ 389 while (rp->start) { 390 if (uc < rp->start) /* Before start of range */ 391 return uc; /* Uppercase = input */ 392 if (uc <= rp->end) /* In range */ 393 return uc + rp->table[uc - rp->start]; 394 rp++; /* Try next range */ 395 } 396 } 397 return uc; /* Past last range */ 398} 399 400/* 401 * UniStrlwr: Lower case a unicode string 402 */ 403static inline wchar_t * 404UniStrlwr(register wchar_t *upin) 405{ 406 register wchar_t *up; 407 408 up = upin; 409 while (*up) { /* For all characters */ 410 *up = UniTolower(*up); 411 up++; 412 } 413 return upin; /* Return input pointer */ 414} 415 416#endif 417 418#endif /* _CIFS_UNICODE_H */ 419