Chilkat HOME Android™ AutoIt C C# C++ Chilkat2-Python CkPython Classic ASP DataFlex Delphi DLL Go Java Node.js Objective-C PHP Extension Perl PowerBuilder PowerShell PureBasic Ruby SQL Server Swift Tcl Unicode C Unicode C++ VB.NET VBScript Visual Basic 6.0 Visual FoxPro Xojo Plugin
(Unicode C++) Load Text File to String using any Code PageDemonstrates how to load a file that contains text in any of the following binary representations (i.e. "charsets", "character encodings" or "code pages"). This list shows each supported charset name followed by the integer code page value. us-ascii 20127 utf-8 65001 utf8 65001 utf-16 1200 utf16 1200 ucs-2 1200 ucs2 1200 unicode 1200 unicodefffe 1201 utf-7 65000 utf-32 65005 utf-32be 65006 windows-1250 1250 windows-1251 1251 windows-1252 1252 windows-1253 1253 windows-1254 1254 windows-1255 1255 windows-1256 1256 windows-1257 1257 windows-1258 1258 windows-874 874 iso-8859-1 28591 iso-8859-2 28592 iso-8859-3 28593 iso-8859-4 28594 iso-8859-5 28595 iso-8859-6 28596 iso-8859-7 28597 iso-8859-8 28598 iso-8859-9 28599 iso-8859-11 874 iso-8859-13 28603 iso-8859-15 28605 iso-8859-8-i 38598 iso-2022-jp 50220 iso-2022-kr 50225 big5 950 gb2312 936 ks-c-5601-1987 949 shift_jis 932 koi8-r 20866 koi8-u 21866 koi 20866 koi8 20866 koi8-ru 21866 koi8r 20866 unicode-1-1-utf-7 65000 unicode-1-1-utf-8 65001 unicode-2-0-utf-7 65000 unicode-2-0-utf-8 65001 us 20127 utf-16be 1201 utf-16le 1200 utf16be 1201 utf16le 1200 iso-10646-ucs-2 1200 iso-2022-jpeuc 51932 iso-2022-kr-7 50225 iso-2022-kr-7bit 50225 iso-2022-kr-8 51949 iso-2022-kr-8bit 51949 iso-8859-8 visual 28598 iso-ir-100 28591 iso-ir-101 28592 iso-ir-109 28593 iso-ir-110 28594 iso-ir-126 28597 iso-ir-127 28596 iso-ir-138 28598 iso-ir-144 28595 iso-ir-148 28599 iso-ir-149 949 iso-ir-58 936 iso-ir-6 20127 iso646-us 20127 iso-646.irv:1991 20127 iso-8859-1:1987 28591 iso-8859-2:1987 28592 iso-8859-3:1988 28593 iso-8859-4:1988 28594 iso-8859-5:1988 28595 iso-8859-6:1987 28596 iso-8859-7:1987 28597 iso-8859-8:1988 28598 iso-8859-9:1989 28599 iso-2022-jp-1 50221 iso-2022-jp-2 50222 ibm037 37 ibm437 437 ibm500 500 ibm737 737 ibm775 775 ibm850 850 ibm852 852 ibm855 855 ibm857 857 ibm00858 858 ibm860 860 ibm861 861 ibm863 863 ibm864 864 ibm865 865 ibm869 869 ibm870 870 ibm1026 1026 ibm01140 1140 ibm01141 1141 ibm01142 1142 ibm01143 1143 ibm01144 1144 ibm01145 1145 ibm01146 1146 ibm01147 1147 ibm01148 1148 ibm01149 1149 ibm273 20273 ibm277 20277 ibm278 20278 ibm280 20280 ibm284 20284 ibm285 20285 ibm290 20290 ibm297 20297 ibm420 20420 ibm423 20423 ibm424 20424 ibm871 20871 ibm880 20880 ibm905 20905 ibm00924 20924 ibm-thai 20838 ibm01047 1047 ibm037 37 ibm1026 1026 ibm367 20127 ibm437 437 ibm500 500 ibm737 737 ibm775 775 ibm819 28591 ibm862 862 ibm866 866 irv 20105 x-chinese-cns 20000 x-cp20001 20001 x-chinese-eten 20002 x-cp20003 20003 x-cp20004 20004 x-cp20005 20005 x-cp20261 20261 x-cp20269 20269 x-cp20936 20936 x-cp20949 20949 x-cp50227 50227 x-cp1250 1250 x-cp1251 1251 x-cp21027 21027 x-cp50227 50227 x-mac-japanese 10001 x-mac-chinesetrad 10002 x-mac-korean 10003 x-mac-arabic 10004 x-mac-hebrew 10005 x-mac-greek 10006 x-mac-cyrillic 10007 x-mac-chinesesimp 10008 x-mac-romanian 10010 x-mac-ukrainian 10017 x-mac-thai 10021 x-mac-ce 10029 x-mac-icelandic 10079 x-mac-turkish 10081 x-mac-croatian 10082 x-ia5 20105 x-ia5-german 20106 x-ia5-swedish 20107 x-ia5-norwegian 20108 x-iscii-de 57002 x-iscii-be 57003 x-iscii-ta 57004 x-iscii-te 57005 x-iscii-as 57006 x-iscii-or 57007 x-iscii-ka 57008 x-iscii-ma 57009 x-iscii-gu 57010 x-iscii-pa 57011 x-ansi 1252 x-ebcdic-koreanextended 20833 x-ebcdic-japaneseanduscanada 50931 x-ebcdic-koreanextended 20833 x-euc 51932 x-euc-cn 51936 x-euc-jp 51932 x-europa 29001 x-ms-cp932 932 x-sjis 932 x-unicode-1-1-utf-7 65000 x-unicode-1-1-utf-8 65001 x-unicode-2-0-utf-7 65000 x-unicode-2-0-utf-8 65001 x-user-defined 50000 x-x-big5 950 big5-hkscs 950 cp1025 21025 cp1026 1026 cp1250 1250 cp1251 1251 cp1252 1252 cp1253 1253 cp1254 1254 cp1255 1255 cp1256 1256 cp1257 1257 cp1258 1258 cp866 866 cp875 875 cp00858 858 cp00924 20924 cp01140 1140 cp01141 1141 cp01142 1142 cp01143 1143 cp01144 1144 cp01145 1145 cp01146 1146 cp01147 1147 cp01148 1148 cp01149 1149 cp037 37 cp273 20273 cp278 20278 cp280 20280 cp284 20284 cp285 20285 cp290 20290 cp297 20297 cp367 20127 cp420 20420 cp423 20423 cp424 20424 cp437 437 cp500 500 cp50227 50227 cp819 28591 cp850 850 cp852 852 cp855 855 cp857 857 cp858 858 cp860 860 cp861 861 cp862 862 cp863 863 cp864 864 cp865 865 cp869 869 cp870 870 cp871 20871 cp880 20880 cp905 20905 cp930 50930 cp933 50933 cp935 50935 cp937 50937 cp939 50939 csiso2022jp 50220 csascii 20127 csbig5 950 cseuckr 51949 cseucpkdfmtjapanese 51932 csgb2312 936 csgb231280 936 csibm037 37 csibm1026 1026 csibm273 20273 csibm277 20277 csibm278 20278 csibm280 20280 csibm284 20284 csibm285 20285 csibm290 20290 csibm297 20297 csibm420 20420 csibm423 20423 csibm424 20424 csibm500 500 csibm870 870 csibm871 20871 csibm880 20880 csibm905 20905 csibmthai 20838 csiso2022kr 50225 csiso58gb231280 936 csisolatin1 28591 csisolatin2 28592 csisolatin3 28593 csisolatin4 28594 csisolatin5 28599 csisolatin9 28605 csisolatinarabic 28596 csisolatincyrillic 28595 csisolatingreek 28597 csisolatinhebrew 28598 cskoi8r 20866 csksc56011987 949 cspc8codepage437 437 csshiftjis 932 csunicode11utf7 65000 cswindows31j 932 ccsid00858 858 ccsid00924 20924 ccsid01140 1140 ccsid01141 1141 ccsid01142 1142 ccsid01143 1143 ccsid01144 1144 ccsid01145 1145 ccsid01146 1146 ccsid01147 1147 ccsid01148 1148 ccsid01149 1149 chinese 936 cn-big5 950 cn-gb 936 cyrillic 28595 dos-720 720 dos-862 862 din-66003 20106 dos-874 874 ebcdic 37 euc-jp 51932 euc-cn 51936 euc-kr 51949 euc-tw 51950 ebcdic-cp-ar1 20420 ebcdic-cp-be 500 ebcdic-cp-ca 37 ebcdic-cp-ch 500 ebcdic-cp-dk 20277 ebcdic-cp-es 20284 ebcdic-cp-fi 20278 ebcdic-cp-fr 20297 ebcdic-cp-gb 20285 ebcdic-cp-gr 20423 ebcdic-cp-he 20424 ebcdic-cp-is 20871 ebcdic-cp-it 20280 ebcdic-cp-nl 37 ebcdic-cp-no 20277 ebcdic-cp-roece 870 ebcdic-cp-se 20278 ebcdic-cp-tr 20905 ebcdic-cp-us 37 ebcdic-cp-wt 37 ebcdic-cp-yu 870 ebcdic-cyrillic 20880 ebcdic-de-273+euro 1141 ebcdic-dk-277+euro 1142 ebcdic-es-284+euro 1145 ebcdic-fi-278+euro 1143 ebcdic-fr-297+euro 1147 ebcdic-gb-285+euro 1146 ebcdic-is-871+euro 1149 ebcdic-it-280+euro 1144 ebcdic-jp-kana 20290 ebcdic-latin9--euro 20924 ebcdic-no-277+euro 1142 ebcdic-se-278+euro 1143 ebcdic-us-37+euro 1140 ecma-114 28596 ecma-118 28597 elot-928 28597 gb18030 936 gb18030-2000 936 gb2312-80 936 gbk 936 gb-2312-80 936 german 20106 greek 28597 greek8 28597 hz-gb-2312 52936 hebrew 28598 asmo-708 708 ansi-x3.4-1968 20127 ansi-x3.4-1986 20127 arabic 28596 ascii 20127 asmo-449 709 asmo-449+ 709 johab 1361 korean 949 ks-c-5601 949 ks-c5601 949 ksc5601 949 ksc-5601 949 ks-c-5601 949 ks-c-5601-1989 949 l1 28591 l2 28592 l3 28593 l4 28594 l5 28599 l9 28605 latin1 28591 latin2 28592 latin3 28593 latin4 28594 latin5 28599 latin-1 28591 latin-2 28592 latin-3 28593 latin-4 28594 latin-5 28599 latin9 28605 latin-9 28605 logical 28598 macintosh 10000 macroman 10000 macjapanese 10001 macchinesetrad 10002 mackorean 10003 macarabic 10004 machebrew 10005 macgreek 10006 maccyrillic 10007 macchinesesimp 10008 maccentraleurope 10029 macicelandic 10079 macturkish 10081 ms-kanji 932 norwegian 20108 ns-4551-1 20108 shiftjis 932 shift-jis 932 sen-850200-b 20107 sjis 932 swedish 20107 tis-620 874 visual 28598
#include <CkGlobalW.h> #include <CkStringBuilderW.h> void ChilkatSample(void) { // For programming languages where strings are byte sequences, indicate // that we wish for all strings to be passed to Chilkat and returned from Chilkat // as utf-8 (instead of ANSI). // This does not apply to programming languages where strings are objects or // already Unicode/utf-8, such as C#, VB.NET, Swift, Foxpro, Java, etc. CkGlobalW glob; glob.put_DefaultUtf8(true); CkStringBuilderW sbText; // Simply indicate the name of the charset when loading: bool success = sbText.LoadFile(L"something.txt",L"windows-1251"); if (success != true) { wprintf(L"Failed to load the file.\n"); return; } // Get the text as a string. Chilkat already converted from the charset // specified in the call to LoadFile. (Internally, Chilkat always holds // strings as utf-8.) const wchar_t *s = sbText.getAsString(); } |
© 2000-2025 Chilkat Software, Inc. All Rights Reserved.