diff -dPNur taglib-1.8/config-taglib.h.cmake taglib-1.8-ds/config-taglib.h.cmake --- taglib-1.8/config-taglib.h.cmake 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/config-taglib.h.cmake 2013-05-22 20:13:15.000000000 +0200 @@ -3,6 +3,8 @@ /* Define if you have libz */ #cmakedefine HAVE_ZLIB 1 +#cmakedefine HAVE_LIBRCC 1 + #cmakedefine NO_ITUNES_HACKS 1 #cmakedefine WITH_ASF 1 #cmakedefine WITH_MP4 1 diff -dPNur taglib-1.8/ConfigureChecks.cmake taglib-1.8-ds/ConfigureChecks.cmake --- taglib-1.8/ConfigureChecks.cmake 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/ConfigureChecks.cmake 2013-05-22 20:13:15.000000000 +0200 @@ -14,6 +14,8 @@ set(HAVE_ZLIB 0) endif() +SET(HAVE_LIBRCC 1) + set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules) find_package(CppUnit) if(NOT CppUnit_FOUND AND BUILD_TESTS) diff -dPNur taglib-1.8/examples/tagreader_c.c taglib-1.8-ds/examples/tagreader_c.c --- taglib-1.8/examples/tagreader_c.c 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/examples/tagreader_c.c 2013-05-22 20:13:15.000000000 +0200 @@ -38,7 +38,7 @@ TagLib_Tag *tag; const TagLib_AudioProperties *properties; - taglib_set_strings_unicode(FALSE); +// taglib_set_strings_unicode(FALSE); for(i = 1; i < argc; i++) { printf("******************** \"%s\" ********************\n", argv[i]); diff -dPNur taglib-1.8/examples/tagwriter.cpp taglib-1.8-ds/examples/tagwriter.cpp --- taglib-1.8/examples/tagwriter.cpp 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/examples/tagwriter.cpp 2013-05-22 20:13:15.000000000 +0200 @@ -92,7 +92,7 @@ if(isArgument(argv[i]) && i + 1 < argc && !isArgument(argv[i + 1])) { char field = argv[i][1]; - TagLib::String value = argv[i + 1]; + TagLib::String value(argv[i + 1], TagLib::String::Locale); TagLib::List::Iterator it; for(it = fileList.begin(); it != fileList.end(); ++it) { diff -dPNur taglib-1.8/taglib/CMakeLists.txt taglib-1.8-ds/taglib/CMakeLists.txt --- taglib-1.8/taglib/CMakeLists.txt 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/taglib/CMakeLists.txt 2013-05-22 20:13:15.000000000 +0200 @@ -35,6 +35,7 @@ audioproperties.h taglib_export.h ${CMAKE_BINARY_DIR}/taglib_config.h + toolkit/rccpatch.h toolkit/taglib.h toolkit/tstring.h toolkit/tlist.h @@ -269,6 +270,7 @@ ) set(toolkit_SRCS + toolkit/rccpatch.cpp toolkit/tstring.cpp toolkit/tstringlist.cpp toolkit/tbytevector.cpp @@ -296,7 +298,7 @@ add_library(tag ${tag_LIB_SRCS} ${tag_HDRS}) if(ZLIB_FOUND) - target_link_libraries(tag ${ZLIB_LIBRARIES}) + target_link_libraries(tag rcc ${ZLIB_LIBRARIES}) endif() set_target_properties(tag PROPERTIES diff -dPNur taglib-1.8/taglib/mpeg/id3v1/id3v1tag.cpp taglib-1.8-ds/taglib/mpeg/id3v1/id3v1tag.cpp --- taglib-1.8/taglib/mpeg/id3v1/id3v1tag.cpp 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/taglib/mpeg/id3v1/id3v1tag.cpp 2013-05-22 20:13:15.000000000 +0200 @@ -64,17 +64,18 @@ String ID3v1::StringHandler::parse(const ByteVector &data) const { - return String(data, String::Latin1).stripWhiteSpace(); + return String(data, String::Latin1ID3).stripWhiteSpace(); } ByteVector ID3v1::StringHandler::render(const String &s) const { if(!s.isLatin1()) { + if (String::ID3WType(String::Latin1) == String::Latin1) return ByteVector(); } - return s.data(String::Latin1); + return s.data(String::Latin1ID3); } //////////////////////////////////////////////////////////////////////////////// @@ -247,7 +248,7 @@ d->track = uchar(data[offset + 29]); } else - d->comment = data.mid(offset, 30); + d->comment = TagPrivate::stringHandler->parse(data.mid(offset, 30)); offset += 30; diff -dPNur taglib-1.8/taglib/mpeg/id3v2/frames/commentsframe.cpp taglib-1.8-ds/taglib/mpeg/id3v2/frames/commentsframe.cpp --- taglib-1.8/taglib/mpeg/id3v2/frames/commentsframe.cpp 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/taglib/mpeg/id3v2/frames/commentsframe.cpp 2013-05-22 20:13:16.000000000 +0200 @@ -150,10 +150,10 @@ return; } - d->textEncoding = String::Type(data[0]); + d->textEncoding = String::ID3Type(data[0]); d->language = data.mid(1, 3); - int byteAlign = d->textEncoding == String::Latin1 || d->textEncoding == String::UTF8 ? 1 : 2; + int byteAlign = (d->textEncoding == String::Latin1 || d->textEncoding == String::Latin1ID3 || d->textEncoding == String::Latin1ID3V2 || d->textEncoding == String::UTF8) ? 1 : 2; ByteVectorList l = ByteVectorList::split(data.mid(4), textDelimiter(d->textEncoding), byteAlign, 2); @@ -174,10 +174,12 @@ String::Type encoding = d->textEncoding; + encoding = String::ID3WType(encoding); + encoding = checkTextEncoding(d->description, encoding); encoding = checkTextEncoding(d->text, encoding); - - v.append(char(encoding)); + + v.append(char(String::ID3RealType(encoding))); v.append(d->language.size() == 3 ? d->language : "XXX"); v.append(d->description.data(encoding)); v.append(textDelimiter(encoding)); diff -dPNur taglib-1.8/taglib/mpeg/id3v2/frames/textidentificationframe.cpp taglib-1.8-ds/taglib/mpeg/id3v2/frames/textidentificationframe.cpp --- taglib-1.8/taglib/mpeg/id3v2/frames/textidentificationframe.cpp 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/taglib/mpeg/id3v2/frames/textidentificationframe.cpp 2013-05-22 20:13:16.000000000 +0200 @@ -187,12 +187,12 @@ // read the string data type (the first byte of the field data) - d->textEncoding = String::Type(data[0]); + d->textEncoding = String::ID3Type(data[0]); // split the byte array into chunks based on the string type (two byte delimiter // for unicode encodings) - int byteAlign = d->textEncoding == String::Latin1 || d->textEncoding == String::UTF8 ? 1 : 2; + int byteAlign = (d->textEncoding == String::Latin1 || d->textEncoding == String::Latin1ID3 || d->textEncoding == String::Latin1ID3V2 || d->textEncoding == String::UTF8) ? 1 : 2; // build a small counter to strip nulls off the end of the field @@ -223,11 +223,14 @@ ByteVector TextIdentificationFrame::renderFields() const { - String::Type encoding = checkTextEncoding(d->fieldList, d->textEncoding); + String::Type encoding = d->textEncoding; + + encoding = String::ID3WType(encoding); + encoding = checkTextEncoding(d->fieldList, encoding); ByteVector v; - v.append(char(encoding)); + v.append(char(String::ID3RealType(encoding))); for(StringList::ConstIterator it = d->fieldList.begin(); it != d->fieldList.end(); it++) { diff -dPNur taglib-1.8/taglib/mpeg/id3v2/id3v2frame.cpp taglib-1.8-ds/taglib/mpeg/id3v2/id3v2frame.cpp --- taglib-1.8/taglib/mpeg/id3v2/id3v2frame.cpp 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/taglib/mpeg/id3v2/id3v2frame.cpp 2013-05-22 20:10:07.000000000 +0200 @@ -295,7 +295,7 @@ if((encoding == String::UTF8 || encoding == String::UTF16BE) && version != 4) return String::UTF16; - if(encoding != String::Latin1) + if((encoding != String::Latin1)&&(encoding != String::Latin1ID3V2)) return encoding; for(StringList::ConstIterator it = fields.begin(); it != fields.end(); ++it) { diff -dPNur taglib-1.8/taglib/toolkit/rccpatch.cpp taglib-1.8-ds/taglib/toolkit/rccpatch.cpp --- taglib-1.8/taglib/toolkit/rccpatch.cpp 1970-01-01 01:00:00.000000000 +0100 +++ taglib-1.8-ds/taglib/toolkit/rccpatch.cpp 2013-05-22 20:13:16.000000000 +0200 @@ -0,0 +1,237 @@ +#include + +#include +#include "tstring.h" +#include "tbytevector.h" + +//#define RCC_DEBUG + + +#ifndef HAVE_LIBRCC +# include +#endif + +#ifdef HAVE_LIBRCC +# ifdef RCC_DEBUG +# include +# endif /* RCC_DEBUG */ +# include +# include +#endif /* HAVE_LIBRCC */ + + +#ifdef HAVE_LIBRCC +# define ID3_CLASS 0 +# define ID3V2_CLASS 1 +# define UTF_CLASS 2 +# define OUT_CLASS 3 +static rcc_class classes[] = { + { "id3", RCC_CLASS_STANDARD, NULL, NULL, "ID3 Encoding", 0 }, + { "id3v2", RCC_CLASS_STANDARD, "id3", NULL, "ID3 v.2 Encoding", 0 }, + { "utf", RCC_CLASS_KNOWN, "UTF-8", NULL, "Unicode Encoding", 0}, + { "out", RCC_CLASS_TRANSLATE_LOCALE, "LC_CTYPE", NULL, "Output Encoding", 0 }, + { NULL, RCC_CLASS_STANDARD, NULL, NULL, NULL, 0 } +}; + +static int rcc_initialized = 0; + +static rcc_context ctx = NULL; +#endif /* HAVE_LIBRCC */ + + +void rccTaglibPatchFree() { +#ifdef HAVE_LIBRCC + if (rcc_initialized) { + rccFree(); + rcc_initialized = 0; + } +#endif /* HAVE_LIBRCC */ +} + +void rccTaglibPatchInit() { +#ifdef HAVE_LIBRCC + if (rcc_initialized) return; + rccInit(); + rccInitDefaultContext(NULL, 0, 0, classes, 0); + rccLoad(NULL, "xmms"); + rccInitDb4(NULL, NULL, 0); + rcc_initialized = 1; +#endif /* HAVE_LIBRCC */ +} + +void rccTaglibPatchSetContext(void *newctx) { +#ifdef HAVE_LIBRCC + if (newctx) { + ctx = (rcc_context)newctx; + rcc_initialized = 1; + } +#endif /* HAVE_LIBRCC */ +} + +static void rccTaglibPatchTryInit() { +#ifdef HAVE_LIBRCC + if (!rcc_initialized) { + rccTaglibPatchInit(); + if (rcc_initialized) atexit(rccTaglibPatchFree); + } +#endif /* HAVE_LIBRCC */ +} + + +TagLib::ByteVector rccTaglibPatchRecodeOutput(const std::string &s) { + TagLib::ByteVector v; +#ifdef HAVE_LIBRCC + size_t rlen; + char *res; + + rccTaglibPatchTryInit(); + + res = rccSizedRecode(ctx, UTF_CLASS, OUT_CLASS, s.c_str(), s.length(), &rlen); +#ifdef RCC_DEBUG + for (const unsigned char *c = (const unsigned char*)s.c_str(); *c; c++) { + if (*c > 127) { + printf(" Output: %s - %s\n", s.c_str(), res?res:"null"); + break; + } + } +#endif /* RCC_DEBUG */ + + if (res) v.setData(res, rlen); + else v.setData("", 0); + //v.setData(s.c_str(), s.length()); + + return v; +#else + v.setData("", 0); + + return v; +#endif /* HAVE_LIBRCC */ +} + +TagLib::ByteVector rccTaglibPatchRecodeOutputID3(const std::string &s, bool v2 = false) { + TagLib::ByteVector v; +#ifdef HAVE_LIBRCC + size_t rlen; + char *res; + + rccTaglibPatchTryInit(); + + res = rccSizedRecode(ctx, UTF_CLASS, v2?ID3V2_CLASS:ID3_CLASS, s.c_str(), s.length(), &rlen); +#ifdef RCC_DEBUG + for (const unsigned char *c = (const unsigned char*)s.c_str(); *c; c++) { + if (*c > 127) { + printf(" OutputID3(%i): %s - %s\n", v2, s.c_str(), res?res:"null"); + break; + } + } +#endif /* RCC_DEBUG */ + + if (res) v.setData(res, rlen); + else v.setData("", 0); + //v.setData(s.c_str(), s.length()); + + return v; +#else + v.setData("", 0); + + return v; +#endif /* HAVE_LIBRCC */ +} + +TagLib::ByteVector rccTaglibPatchRecodeInput(const std::string &s) { + TagLib::ByteVector v; +#ifdef HAVE_LIBRCC + size_t rlen; + char *res; + + rccTaglibPatchTryInit(); + + res = rccSizedRecode(ctx, OUT_CLASS, UTF_CLASS, s.c_str(), s.length(), &rlen); +#ifdef RCC_DEBUG + for (const unsigned char *c = (const unsigned char*)s.c_str(); *c; c++) { + if (*c > 127) { + printf(" Input: %s - %s\n", s.c_str(), res?res:"null"); + break; + } + } +#endif /* RCC_DEBUG */ + + if (res) v.setData(res, rlen); + else +#endif /* HAVE_LIBRCC */ + v.setData("", 0); + + return v; +} + +TagLib::ByteVector rccTaglibPatchRecodeInputID3(const std::string &s, bool v2 = false) { + TagLib::ByteVector v; +#ifdef HAVE_LIBRCC + size_t rlen; + char *res; + + rccTaglibPatchTryInit(); + + res = rccSizedRecode(ctx, v2?ID3V2_CLASS:ID3_CLASS, UTF_CLASS, s.c_str(), s.length(), &rlen); +#ifdef RCC_DEBUG + for (const unsigned char *c = (const unsigned char*)s.c_str(); *c; c++) { + if (*c > 127) { + printf(" InputID3(%i): %s - %s\n", v2, s.c_str(), res?res:"null"); + break; + } + } +#endif /* RCC_DEBUG */ + if (res) v.setData(res, rlen); + else +#endif /* HAVE_LIBRCC */ + v.setData("", 0); + + return v; +} + +TagLib::String::Type rccTaglibPatchGetLocaleType() { +#ifdef HAVE_LIBRCC + size_t len; + char charset[32]; + + rccTaglibPatchTryInit(); + if (!rccLocaleGetCharset(charset, NULL, 31)) { + if (!strncmp(charset, "UTF", 3)) { + len = strlen(charset); + + if (charset[len-1]=='8') return TagLib::String::UTF8; + if (!strcmp(charset+(len-2),"16")) return TagLib::String::UTF16; + if (!strcmp(charset+(len-4),"16LE")) return TagLib::String::UTF16LE; + if (!strcmp(charset+(len-4),"16BE")) return TagLib::String::UTF16BE; + } + return TagLib::String::Latin1; + } +#endif /* HAVE_LIBRCC */ + return TagLib::String::UTF8; +} + +TagLib::String::Type rccTaglibPatchGetID3Type() { +#ifdef HAVE_LIBRCC + size_t len; + const char *charset; + + rccTaglibPatchTryInit(); + + charset = rccGetCurrentCharsetName(ctx, ID3V2_CLASS); + if (charset) { + if (!strncmp(charset, "UTF", 3)) { + len = strlen(charset); + + if (charset[len-1]=='8') return TagLib::String::UTF8; + if (!strcmp(charset+(len-2),"16")) return TagLib::String::UTF16; + if (!strcmp(charset+(len-4),"16LE")) return TagLib::String::UTF16LE; + if (!strcmp(charset+(len-4),"16BE")) return TagLib::String::UTF16BE; + } + return TagLib::String::Latin1ID3V2; + } else { + // Error or no-language configured: If Latin1ID3V2 is returned we normally will use the default unicode encoding unless Latin1 is selected by taglib + return TagLib::String::Latin1ID3V2; + } +#endif /* HAVE_LIBRCC */ + return TagLib::String::Latin1; +} diff -dPNur taglib-1.8/taglib/toolkit/rccpatch.h taglib-1.8-ds/taglib/toolkit/rccpatch.h --- taglib-1.8/taglib/toolkit/rccpatch.h 1970-01-01 01:00:00.000000000 +0100 +++ taglib-1.8-ds/taglib/toolkit/rccpatch.h 2013-05-22 20:13:16.000000000 +0200 @@ -0,0 +1,20 @@ +#ifndef _RCC_PATCH_H +#define _RCC_PATCH_H + +#include +#include "tstring.h" +#include "tbytevector.h" + +void rccTaglibPatchFree(); +void rccTaglibPatchInit(); +void rccTaglibPatchSetContext(void *newctx); + +TagLib::ByteVector rccTaglibPatchRecodeOutput(const std::string &s); +TagLib::ByteVector rccTaglibPatchRecodeInput(const std::string &s); +TagLib::ByteVector rccTaglibPatchRecodeOutputID3(const std::string &s, bool v2 = false); +TagLib::ByteVector rccTaglibPatchRecodeInputID3(const std::string &s, bool v2 = false); + +TagLib::String::Type rccTaglibPatchGetLocaleType(); +TagLib::String::Type rccTaglibPatchGetID3Type(); + +#endif /* _RCC_PATCH_H */ diff -dPNur taglib-1.8/taglib/toolkit/tstring.cpp taglib-1.8-ds/taglib/toolkit/tstring.cpp --- taglib-1.8/taglib/toolkit/tstring.cpp 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/taglib/toolkit/tstring.cpp 2013-05-22 20:13:16.000000000 +0200 @@ -23,6 +23,7 @@ * http://www.mozilla.org/MPL/ * ***************************************************************************/ +#include "rccpatch.h" #include "tstring.h" #include "unicode.h" #include "tdebug.h" @@ -168,7 +169,7 @@ if(v.isEmpty()) return; - if(t == Latin1 || t == UTF8) { + if(t == Latin1 || t == Latin1ID3 || t == Latin1ID3V2 || t == UTF8) { int length = 0; d->data.resize(v.size()); @@ -397,10 +398,38 @@ { ByteVector v; - switch(t) { + if (t == Locale) { + // The source is either Unicode or real Latin1 (if rcc is bypassed) + std::string s = to8Bit(true); + + // In case of UTF8 locale, this probably will return NULL (no recoding needed), but we will take UTF8 path in the next swtich + v = rccTaglibPatchRecodeOutput(s); + if (v.size()) return v; + t = rccTaglibPatchGetLocaleType(); + } + + switch(t) { + case Latin1ID3: + case Latin1ID3V2: + { + std::string s = to8Bit(true); + if (t == Latin1ID3) v = rccTaglibPatchRecodeOutputID3(s, false); + else if (t == Latin1ID3V2) v = rccTaglibPatchRecodeOutputID3(s, true); + if (v.size()) break; + + // we don't know if we got NULL because rcc is disabled (error) or UTF8 output is required + if ((t == Latin1ID3V2)&&(rccTaglibPatchGetID3Type() == UTF8)) { + v.setData(s.c_str(), s.length()); + } else { + for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) + v.append(char(*it)); + } + break; + } case Latin1: { + // We can have the UTF16 inside, but first 256 positions is equal to Latin1 for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) v.append(char(*it)); break; @@ -750,6 +779,34 @@ void String::prepare(Type t) { + if (t == Locale) t = rccTaglibPatchGetLocaleType(); + + if ((t == Latin1)||(t == Latin1ID3)||(t == Latin1ID3V2)) { + std::string s = to8Bit(false); + ByteVector v; + + if (t == Latin1ID3) v = rccTaglibPatchRecodeInputID3(s, false); + else if (t == Latin1ID3V2) v = rccTaglibPatchRecodeInputID3(s, true); + else /* Latin1 converted from Locale */ v = rccTaglibPatchRecodeInput(s); + + if (v.size()) { + int length = 0; + d->data.resize(v.size()); + wstring::iterator targetIt = d->data.begin(); + for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) { + *targetIt = uchar(*it); + ++targetIt; + ++length; + } + d->data.resize(length); + t = UTF8; + } else { + // We don't know if we got UTF-8 encoded string or either rcc is disable or something is failed, + // since standard applications are really expecting here Latin1, it is safe to just check if we have violations of UTF8 + //if (Unicode::isLegalUTF8(s)) t = UTF8; + } + } + switch(t) { case UTF16: { @@ -839,6 +896,27 @@ std::ostream &operator<<(std::ostream &s, const String &str) { - s << str.to8Bit(); + ByteVector bv = str.data(String::Locale); + s << bv; return s; } + +String::Type String::ID3Type(int i) { + if (i == Latin1) return Latin1ID3V2; + return Type(i); +}; + +String::Type String::ID3WType(Type type) { + Type rcc_type = rccTaglibPatchGetID3Type(); + if ((rcc_type == Latin1ID3)||(rcc_type == Latin1ID3V2)||(rcc_type == Latin1)) { + if (type == Latin1) return rcc_type; + return type; + } + + return rcc_type; +}; + +String::Type String::ID3RealType(Type type) { + if ((type == Latin1ID3)||(type == Latin1ID3V2)) return Latin1; + return type; +} diff -dPNur taglib-1.8/taglib/toolkit/tstring.h taglib-1.8-ds/taglib/toolkit/tstring.h --- taglib-1.8/taglib/toolkit/tstring.h 2012-09-06 20:03:15.000000000 +0200 +++ taglib-1.8-ds/taglib/toolkit/tstring.h 2013-05-22 20:13:16.000000000 +0200 @@ -90,6 +90,18 @@ */ enum Type { /*! + * Determine using current locale settings + */ + Locale = -1, + /*! + * Latin1 for ID3 tags. + */ + Latin1ID3 = 65, + /*! + * Latin1 for ID3 tags. + */ + Latin1ID3V2 = 66, + /*! * IS08859-1, or Latin1 encoding. 8 bit characters. */ Latin1 = 0, @@ -112,6 +124,10 @@ UTF16LE = 4 }; + static Type ID3Type(int i); + static Type ID3WType(Type type); + static Type ID3RealType(Type type); + /*! * Constructs an empty String. */