summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@dside.dyndns.org>2005-08-07 21:41:32 +0000
committerSuren A. Chilingaryan <csa@dside.dyndns.org>2005-08-07 21:41:32 +0000
commit9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a (patch)
tree9d78892cf391d2cdb5da216cfec78a8b565751c7
parent94ca629ceec7b0dc9f6f724b2e15923d3ec1d5b3 (diff)
downloadlibrcc-9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a.tar.gz
librcc-9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a.tar.bz2
librcc-9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a.tar.xz
librcc-9d4628df369b92016b7fc3bfc7fed6d06ff2ca9a.zip
- Russian autoengine is renamed to LibRCD
- Fix Learning with Language Autodetection switched on - Attempt to perform rccFS with Language Autodetection switched off, if failed with default behaviour. - Systematization of translation engine: + Rearangement of the translation modes: OFF, TO_ENGLISH, SKIP_RELATED, SKIP_PARRENT, FULL. + New class types: TRANSLATE_LOCALE, TRANSLATE_CURRENT, TRANSLATE_FROM. - Detect "Unicode" locales for foreign languages - "out" class is assumed to be TRANSLATE_LOCALE - Respect RCC_CLASS_KNOWN - Check for Latin UTF-8 prior to running any charset detection engine.
-rw-r--r--README7
-rw-r--r--ToDo8
-rw-r--r--engines/Makefile.am4
-rw-r--r--engines/librcd.c (renamed from engines/russian.c)4
-rw-r--r--examples/example2.c4
-rw-r--r--examples/rcc-example.xml4
-rw-r--r--examples/rcc.xml2
-rw-r--r--src/engine.c48
-rw-r--r--src/engine.h1
-rw-r--r--src/internal.h11
-rw-r--r--src/librcc.c47
-rw-r--r--src/librcc.h35
-rw-r--r--src/lngconfig.c37
-rw-r--r--src/lngconfig.h2
-rw-r--r--src/lngrecode.c47
-rw-r--r--src/rccconfig.c7
-rw-r--r--src/rccconfig.h1
-rw-r--r--src/rcciconv.c2
-rw-r--r--src/rcclocale.h3
-rw-r--r--src/recode.c358
-rw-r--r--ui/rccnames.c2
21 files changed, 416 insertions, 218 deletions
diff --git a/README b/README
index e69de29..6a47827 100644
--- a/README
+++ b/README
@@ -0,0 +1,7 @@
+
+Language Autodetection
+----------------------
+
+
+Translation
+-----------
diff --git a/ToDo b/ToDo
index 214495f..db0515f 100644
--- a/ToDo
+++ b/ToDo
@@ -10,6 +10,14 @@
- Revise locking subsystem
- Libtranslate can leave translated message partly in old language. This causes problems
because of recoding from UTF8 to Current language. (With UTF-8 encoding should be Okey).
+ - Lating languages. If in the string all characters < 0x7F then we have one of the Latin
+ languages?
+ - Statistic approach of language detection.
+ - LibRCD autolearning using db4
+ + Charset detection
+ + Language detection (same as charsets, but for UTF8...)
+ * Consider word recognition based on probability
+ + Autolearning is triggered by large enough dictionary words
1.x:
diff --git a/engines/Makefile.am b/engines/Makefile.am
index 404cc32..678fc8b 100644
--- a/engines/Makefile.am
+++ b/engines/Makefile.am
@@ -3,8 +3,8 @@ lib_LTLIBRARIES = libwestern.la
libdir = $(pkgdatadir)/engines
if HAVE_RCD
-lib_LTLIBRARIES += librussian.la
-librussian_la_SOURCES = russian.c
+lib_LTLIBRARIES += librcd.la
+librussian_la_SOURCES = librcd.c
librussian_la_LDFLAGS = -module -avoid-version -export-symbols-regex "rccGetInfo"
endif
diff --git a/engines/russian.c b/engines/librcd.c
index 0df145c..c24d244 100644
--- a/engines/russian.c
+++ b/engines/librcd.c
@@ -9,11 +9,11 @@ static rcc_autocharset_id AutoengineRussian(rcc_engine_context ctx, const char *
}
static rcc_engine russian_engine = {
- "Russian", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", NULL}
+ "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-R","UTF-8","IBM866", NULL}
};
static rcc_engine ukrainian_engine = {
- "Russian", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", NULL}
+ "LibRCD", NULL, NULL, &AutoengineRussian, {"CP1251","KOI8-U","UTF-8","IBM865", NULL}
};
rcc_engine *rccGetInfo(const char *lang) {
diff --git a/examples/example2.c b/examples/example2.c
index 5ef3efb..2083fcc 100644
--- a/examples/example2.c
+++ b/examples/example2.c
@@ -15,7 +15,7 @@ int main(int argc, char *argv[]) {
rcc_class classes[] = {
{ "input", RCC_CLASS_STANDARD, NULL, NULL, "Input Encoding", 0 },
- { "output", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, "Output Encoding", 0 },
+ { "output", RCC_CLASS_TRANSLATE_LOCALE, "LC_CTYPE", NULL, "Output Encoding", 0 },
{ NULL }
};
@@ -24,7 +24,7 @@ int main(int argc, char *argv[]) {
rccInit();
rccInitDefaultContext(NULL, 0, 0, classes, 0);
rccInitDb4(NULL, "example", 0);
- rccSetOption(NULL, RCC_OPTION_TRANSLATE, RCC_OPTION_TRANSLATE_FULL);
+ rccSetOption(NULL, RCC_OPTION_TRANSLATE, RCC_OPTION_TRANSLATE_SKIP_PARRENT);
current_language_id = rccGetCurrentLanguage(NULL);
english_language_id = rccGetLanguageByName(NULL, "en");
diff --git a/examples/rcc-example.xml b/examples/rcc-example.xml
index 6bad1f3..8dc068c 100644
--- a/examples/rcc-example.xml
+++ b/examples/rcc-example.xml
@@ -11,7 +11,7 @@
<FullName>Russian</FullName>
<FullName locale="ru">Русский</FullName>
<Engines>
- <Engine>russian</Engine>
+ <Engine>librcd</Engine>
</Engines>
<Charsets>
<Charset>UTF-8</Charset>
@@ -25,7 +25,7 @@
<Language name="uk">
<FullName>Ukrainian</FullName>
<Engines>
- <Engine>russian</Engine>
+ <Engine>librcd</Engine>
</Engines>
<Charsets>
<Charset>UTF-8</Charset>
diff --git a/examples/rcc.xml b/examples/rcc.xml
index 50d2ee2..562f38a 100644
--- a/examples/rcc.xml
+++ b/examples/rcc.xml
@@ -13,6 +13,7 @@
<Language name="de">
<FullName>German</FullName>
<Charsets>
+ <Charset>ISO8859-1</Charset>
<Charset>UTF-8</Charset>
</Charsets>
<Engines>
@@ -22,6 +23,7 @@
<Language name="fr">
<FullName>French</FullName>
<Charsets>
+ <Charset>ISO8859-1</Charset>
<Charset>UTF-8</Charset>
</Charsets>
<Engines>
diff --git a/src/engine.c b/src/engine.c
index 8058faf..f9c2284 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -151,3 +151,51 @@ rcc_context rccEngineGetRccContext(rcc_engine_context ctx) {
return ctx->config->ctx;
}
+
+#define bit(i) (1<<i)
+
+static int CheckWestern(const unsigned char *buf, int len) {
+ long i,j;
+ int bytes=0;
+
+ if (!len) len = strlen(buf);
+ for (i=0;i<len;i++) {
+ if (bytes>0) {
+ // Western is 0x100-0x17e
+ if ((buf[i]&0xC0)==0x80) bytes--;
+ else return 0;
+ } else {
+ if (buf[i]<128) continue;
+
+ for (j=6;j>=0;j--)
+ if ((buf[i]&bit(j))==0) break;
+
+ if ((j==0)||(j==6)) return 0;
+
+ bytes=6-j;
+ if (bytes==1) {
+ // Western Languages (C2-C3)
+ if ((buf[i]!=0xC2)&&(buf[i]!=0xC3)) return 0;
+ } else return 0;
+ }
+ }
+ return 1;
+}
+
+
+rcc_autocharset_id rccEngineDetectCharset(rcc_engine_context ctx, const char *buf, size_t len) {
+ rcc_autocharset_id utf;
+
+ if (CheckWestern(buf, len)) {
+ utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF-8");
+ if (utf != (rcc_autocharset_id)-1) return utf;
+ utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF8");
+ if (utf != (rcc_autocharset_id)-1) return utf;
+ utf=rccConfigGetAutoCharsetByName(ctx->config, "UTF_8");
+ return utf;
+ }
+
+ if ((ctx)&&(ctx->func)) return ctx->func(ctx, buf, len);
+ return (rcc_autocharset_id)-1;
+}
+
diff --git a/src/engine.h b/src/engine.h
index 445e962..96e6db6 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -38,5 +38,6 @@ void rccEngineFreeContext(rcc_engine_context engine_ctx);
int rccEngineConfigure(rcc_engine_context ctx);
rcc_charset_id rccAutoengineRussian(rcc_engine_context ctx, const char *buf, int len);
+rcc_autocharset_id rccEngineDetectCharset(rcc_engine_context ctx, const char *buf, size_t len);
#endif /* _RCC_ENGINE_H */
diff --git a/src/internal.h b/src/internal.h
index d5797fc..089311f 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -28,11 +28,20 @@
#include "rcciconv.h"
#include "rccstring.h"
#include "rccmutex.h"
+#include "rcclocale.h"
typedef rcc_language_id rcc_language_parrent_list[RCC_MAX_LANGUAGE_PARRENTS];
+struct rcc_language_internal_t {
+ rcc_language language;
+ rcc_language_id parrents[RCC_MAX_LANGUAGE_PARRENTS + 1];
+ unsigned char latin;
+};
+typedef struct rcc_language_internal_t rcc_language_internal;
+typedef rcc_language_internal *rcc_language_internal_ptr;
+
struct rcc_context_t {
char locale_variable[RCC_MAX_VARIABLE_CHARS+1];
@@ -43,8 +52,8 @@ struct rcc_context_t {
unsigned int max_languages;
unsigned int n_languages;
+ rcc_language_internal *ilang;
rcc_language_ptr *languages;
- rcc_language_parrent_list *language_parrents;
rcc_language_config configs;
unsigned int max_classes;
diff --git a/src/librcc.c b/src/librcc.c
index 208fcb3..c27c47d 100644
--- a/src/librcc.c
+++ b/src/librcc.c
@@ -140,7 +140,7 @@ rcc_context rccCreateContext(const char *locale_variable, unsigned int max_langu
rcc_context ctx;
rcc_language_ptr *languages;
- rcc_language_parrent_list *language_parrents;
+ rcc_language_internal *ilang;
rcc_class_ptr *classes;
rcc_language_config configs;
rcc_iconv *from;
@@ -167,18 +167,18 @@ rcc_context rccCreateContext(const char *locale_variable, unsigned int max_langu
languages = (rcc_language_ptr*)malloc((max_languages+1)*sizeof(rcc_language_ptr));
classes = (rcc_class_ptr*)malloc((max_classes+1)*sizeof(rcc_class_ptr));
from = (rcc_iconv*)malloc((max_classes)*sizeof(rcc_iconv));
- language_parrents = (rcc_language_parrent_list*)malloc((max_languages+1)*sizeof(rcc_language_parrent_list));
+ ilang = (rcc_language_internal*)malloc((max_languages+1)*sizeof(rcc_language_internal));
mutex = rccMutexCreate();
configs = (rcc_language_config)malloc((max_languages)*sizeof(struct rcc_language_config_t));
- if ((!ctx)||(!languages)||(!classes)||(!mutex)||(!language_parrents)) {
+ if ((!ctx)||(!languages)||(!classes)||(!mutex)||(!from)||(!ilang)||(!mutex)) {
if (mutex) rccMutexFree(mutex);
if (from) free(from);
if (configs) free(configs);
if (classes) free(classes);
if (languages) free(languages);
- if (language_parrents) free(language_parrents);
+ if (ilang) free(ilang);
if (ctx) free(ctx);
return NULL;
}
@@ -193,8 +193,7 @@ rcc_context rccCreateContext(const char *locale_variable, unsigned int max_langu
for (i=0;rcc_default_aliases[i].alias;i++)
rccRegisterLanguageAlias(ctx, rcc_default_aliases + i);
- ctx->language_parrents = language_parrents;
- for (i=0;i<max_languages;i++) language_parrents[i][0] = (rcc_language_id)-1;
+ ctx->ilang = ilang;
ctx->languages = languages;
ctx->max_languages = max_languages;
@@ -306,7 +305,7 @@ void rccFreeContext(rcc_context ctx) {
free(ctx->configs);
}
if (ctx->classes) free(ctx->classes);
- if (ctx->language_parrents) free(ctx->language_parrents);
+ if (ctx->ilang) free(ctx->ilang);
if (ctx->languages) free(ctx->languages);
if (ctx->mutex) rccMutexFree(ctx->mutex);
free(ctx);
@@ -365,6 +364,7 @@ int rccUnlockConfiguration(rcc_context ctx, unsigned int lock_code) {
}
rcc_language_id rccRegisterLanguage(rcc_context ctx, rcc_language *language) {
+ unsigned int i;
if (!ctx) {
if (rcc_default_ctx) ctx = rcc_default_ctx;
else return (rcc_language_id)-1;
@@ -373,7 +373,21 @@ rcc_language_id rccRegisterLanguage(rcc_context ctx, rcc_language *language) {
if (ctx->configuration_lock) return (rcc_language_id)-1;
if (ctx->n_languages == ctx->max_languages) return (rcc_language_id)-1;
- ctx->languages[ctx->n_languages++] = language;
+
+ memcpy(ctx->ilang + ctx->n_languages, language, sizeof(rcc_language));
+ ctx->ilang[ctx->n_languages].parrents[0] = (rcc_language_id)-1;
+ ctx->ilang[ctx->n_languages].latin = 0;
+
+ for (i=0;language->charsets[i];i++)
+ if ((strstr(language->charsets[i],"8859"))&&(language->charsets[i][strlen(language->charsets[i])-1]=='1')) {
+ ctx->ilang[ctx->n_languages].latin = 1;
+ break;
+ }
+
+ if ((i==1)&&(!language->charsets[1])&&(rccIsUTF8(language->charsets[0])))
+ ctx->ilang[ctx->n_languages].latin = 1;
+
+ ctx->languages[ctx->n_languages++] = (rcc_language_ptr)(ctx->ilang + ctx->n_languages);
ctx->languages[ctx->n_languages] = NULL;
if (!ctx->current_language)
@@ -388,6 +402,10 @@ rcc_charset_id rccLanguageRegisterCharset(rcc_language *language, rcc_charset ch
if ((!language)||(!charset)) return (rcc_charset_id)-1;
for (i=0;language->charsets[i];i++);
if (i>=RCC_MAX_CHARSETS) return (rcc_charset_id)-1;
+
+ if ((strstr(charset,"8859"))&&(charset[strlen(charset)-1]=='1'))
+ ((rcc_language_internal*)language)->latin = 1;
+
language->charsets[i++] = charset;
language->charsets[i] = NULL;
return i-1;
@@ -443,7 +461,7 @@ rcc_relation_id rccRegisterLanguageRelation(rcc_context ctx, rcc_language_relati
if (language_id == (rcc_language_id)-1) return (rcc_relation_id)-1;
- list = ctx->language_parrents[language_id];
+ list = ((rcc_language_internal*)ctx->languages[language_id])->parrents;
language_id = rccGetLanguageByName(ctx, parrent);
if (language_id == (rcc_language_id)-1) return (rcc_relation_id)0;
@@ -478,6 +496,8 @@ rcc_class_id rccRegisterClass(rcc_context ctx, rcc_class *cl) {
rcc_class_type rccGetClassType(rcc_context ctx, rcc_class_id class_id) {
+ rcc_class_type clt;
+
if (!ctx) {
if (rcc_default_ctx) ctx = rcc_default_ctx;
else return RCC_CLASS_INVALID;
@@ -485,7 +505,14 @@ rcc_class_type rccGetClassType(rcc_context ctx, rcc_class_id class_id) {
if ((class_id<0)||(class_id>=ctx->n_classes)) return RCC_CLASS_INVALID;
- return ctx->classes[class_id]->class_type;
+ /*DS: temporary solution */
+
+ clt = ctx->classes[class_id]->class_type;
+
+ if ((!strcasecmp(ctx->classes[class_id]->name, "out"))&&(clt == RCC_CLASS_STANDARD))
+ clt = RCC_CLASS_TRANSLATE_LOCALE;
+
+ return clt;
}
diff --git a/src/librcc.h b/src/librcc.h
index 63a6f80..0529682 100644
--- a/src/librcc.h
+++ b/src/librcc.h
@@ -13,13 +13,6 @@
#define RCC_MAX_ALIASES 64
#define RCC_MAX_CLASSES 16
-#define RCC_MAX_ERRORS 3
-
-#define RCC_MAX_CHARSET_CHARS 16
-#define RCC_MAX_LANGUAGE_CHARS 16
-#define RCC_MAX_VARIABLE_CHARS 16
-
-
/* ID's */
/**
* Language ID.
@@ -301,7 +294,10 @@ typedef enum rcc_class_type_t {
RCC_CLASS_INVALID = 0, /**< Invalid value */
RCC_CLASS_STANDARD, /**< Standard class */
RCC_CLASS_KNOWN, /**< Class encoding is known and no autodetection should be performed */
- RCC_CLASS_FS /**< Class strings are representing file names */
+ RCC_CLASS_FS, /**< Class strings are representing file names */
+ RCC_CLASS_TRANSLATE_LOCALE, /**< It is permited to translate class strings to current Locale Language in rccTo */
+ RCC_CLASS_TRANSLATE_CURRENT,/**< It is permited to translate class strings to Current Language in rccTo */
+ RCC_CLASS_TRANSLATE_FROM, /**< It is permited to translate class strings to Current Language in rccFrom */
} rcc_class_type;
/**
@@ -390,22 +386,13 @@ typedef int rcc_option_value;
*/
#define RCC_OPTION_LEARNING_FLAG_LEARN 2
-/**
- * Switch translation off.
- */
-#define RCC_OPTION_TRANSLATE_OFF 0
-/**
- * Translate data to english language (Current language don't matter).
- */
-#define RCC_OPTION_TRANSLATE_TO_ENGLISH 1
-/**
- * Skip translation of the english text.
- */
-#define RCC_OPTION_TRANSLATE_SKIP_ENGLISH 2
-/**
- * Translate whole data to the current language.
- */
-#define RCC_OPTION_TRANSLATE_FULL 3
+typedef enum rcc_option_translate_t {
+ RCC_OPTION_TRANSLATE_OFF = 0, /**< Switch translation off. */
+ RCC_OPTION_TRANSLATE_TO_ENGLISH, /**< Translate data to english language (Current language don't matter). */
+ RCC_OPTION_TRANSLATE_SKIP_RELATED, /**< Skip translation of the text's between related languages. */
+ RCC_OPTION_TRANSLATE_SKIP_PARRENT, /**< Skip translation of the text's from parrent languages (from english). */
+ RCC_OPTION_TRANSLATE_FULL /**< Translate whole data to the current language */
+} rcc_option_translate;
/**
* List of options available
diff --git a/src/lngconfig.c b/src/lngconfig.c
index f9d1d6d..7e5a428 100644
--- a/src/lngconfig.c
+++ b/src/lngconfig.c
@@ -353,7 +353,7 @@ rcc_speller rccConfigGetSpeller(rcc_language_config config) {
if (config->speller) language_id = rccConfigGetLanguage(config);
else language_id = (rcc_language_id)-1;
- if (language_id != (rcc_language_id)-1) parrents = config->ctx->language_parrents[language_id];
+ if (language_id != (rcc_language_id)-1) parrents = ((rcc_language_internal*)config->language)->parrents;
else parrents = NULL;
if (parrents) {
@@ -508,10 +508,12 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_
if (config->default_charset[class_id]) return config->default_charset[class_id];
- charset_id = rccConfigGetLocaleCharset(config, defvalue);
- if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) {
- config->default_charset[class_id] = charset_id;
- return charset_id;
+ if (cl->defvalue) {
+ charset_id = rccConfigGetLocaleCharset(config, defvalue);
+ if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) {
+ config->default_charset[class_id] = charset_id;
+ return charset_id;
+ }
}
if (cl->defvalue) {
@@ -537,7 +539,7 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_
}
}
- charset_id = rccConfigGetLocaleUnicodeCharset(config, defvalue);
+ charset_id = rccConfigGetLocaleCharset(config, defvalue);
if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) {
config->default_charset[class_id] = charset_id;
return charset_id;
@@ -634,6 +636,7 @@ int rccConfigSetCharsetByName(rcc_language_config config, rcc_class_id class_id,
rcc_charset_id rccConfigGetLocaleCharset(rcc_language_config config, const char *locale_variable) {
const char *lv;
rcc_language_id language_id;
+ char lang[RCC_MAX_CHARSET_CHARS+1];
char stmp[RCC_MAX_CHARSET_CHARS+1];
if ((!config)||(!config->language)) return (rcc_charset_id)-1;
@@ -642,29 +645,17 @@ rcc_charset_id rccConfigGetLocaleCharset(rcc_language_config config, const char
language_id = rccGetLanguageByName(config->ctx, config->language->sn);
if (language_id != (rcc_language_id)-1) {
- if (!rccLocaleGetLanguage(stmp, lv, RCC_MAX_CHARSET_CHARS)) {
- if (!strcmp(config->language->sn, stmp)) {
- if (!rccLocaleGetCharset(stmp, lv, RCC_MAX_CHARSET_CHARS))
- return rccConfigGetCharsetByName(config, stmp);
- }
+ if (!rccLocaleGetCharset(stmp, lv, RCC_MAX_CHARSET_CHARS)) {
+ if (rccIsUnicode(stmp))
+ return rccConfigGetCharsetByName(config, stmp);
+ if ((!rccLocaleGetLanguage(lang, lv, RCC_MAX_CHARSET_CHARS))&&(!strcmp(config->language->sn, lang)))
+ return rccConfigGetCharsetByName(config, stmp);
}
}
return (rcc_charset_id)-1;
}
-rcc_charset_id rccConfigGetLocaleUnicodeCharset(rcc_language_config config, const char *locale_variable) {
- char stmp[RCC_MAX_CHARSET_CHARS+1];
-
- if ((!config)||(!config->language)) return (rcc_charset_id)-1;
-
- if (!rccLocaleGetCharset(stmp, locale_variable?locale_variable:config->ctx->locale_variable, RCC_MAX_CHARSET_CHARS)) {
- if (rccIsUTF8(stmp)) return rccConfigGetCharsetByName(config, stmp);
- }
-
- return (rcc_charset_id)-1;
-}
-
int rccConfigConfigure(rcc_language_config config) {
int err;
rcc_context ctx;
diff --git a/src/lngconfig.h b/src/lngconfig.h
index edfc782..b9e9a6b 100644
--- a/src/lngconfig.h
+++ b/src/lngconfig.h
@@ -47,8 +47,6 @@ void rccConfigClear(rcc_language_config config);
int rccConfigConfigure(rcc_language_config config);
-rcc_charset_id rccConfigGetLocaleUnicodeCharset(rcc_language_config config, const char *locale_variable);
-
const char *rccConfigGetAutoCharsetName(rcc_language_config config, rcc_autocharset_id charset_id);
rcc_autocharset_id rccConfigGetAutoCharsetByName(rcc_language_config config, const char *name);
diff --git a/src/lngrecode.c b/src/lngrecode.c
index aef8e24..4b4f298 100644
--- a/src/lngrecode.c
+++ b/src/lngrecode.c
@@ -7,8 +7,38 @@
#include "internal.h"
#include "fs.h"
+static rcc_autocharset_id rccConfigDetectCharsetInternal(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
+ int err;
+ rcc_context ctx;
+ rcc_class_type class_type;
+ rcc_autocharset_id autocharset_id;
+
+ if ((!buf)||(!config)) return (rcc_autocharset_id)-1;
+
+ ctx = config->ctx;
+
+ err = rccConfigConfigure(config);
+ if (err) return (rcc_autocharset_id)-1;
+
+ class_type = rccGetClassType(ctx, class_id);
+ if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) {
+ rccMutexLock(config->mutex);
+ autocharset_id = rccEngineDetectCharset(&config->engine_ctx, buf, len);
+ rccMutexUnLock(config->mutex);
+ return autocharset_id;
+ }
+
+ return (rcc_autocharset_id)-1;
+}
+
+
+rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
+ return rccConfigDetectCharsetInternal(config, class_id, buf, len);
+}
+
rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
rcc_context ctx;
+ rcc_class_type class_type;
rcc_string result;
rcc_option_value usedb4;
rcc_autocharset_id charset_id;
@@ -30,7 +60,10 @@ rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id,
}
}
- charset_id = rccConfigDetectCharset(config, class_id, buf, len);
+ class_type = rccGetClassType(ctx, class_id);
+
+ if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
+ else charset_id = rccConfigDetectCharset(config, class_id, buf, len);
if (charset_id != (rcc_autocharset_id)-1)
charset = rccConfigGetAutoCharsetName(config, charset_id);
else
@@ -71,6 +104,7 @@ char *rccConfigSizedTo(rcc_language_config config, rcc_class_id class_id, rcc_co
char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) {
rcc_context ctx;
+ rcc_class_type class_type;
rcc_string result;
rcc_option_value usedb4;
rcc_autocharset_id charset_id;
@@ -97,7 +131,10 @@ char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_cl
}
}
- charset_id = rccConfigDetectCharset(config, from, buf, len);
+ class_type = rccGetClassType(ctx, from);
+
+ if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
+ else charset_id = rccConfigDetectCharset(config, from, buf, len);
if (charset_id != (rcc_autocharset_id)-1)
fromcharset = rccConfigGetAutoCharsetName(config, charset_id);
else
@@ -115,6 +152,7 @@ char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_cl
char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen) {
rcc_context ctx;
+ rcc_class_type class_type;
rcc_string result;
rcc_option_value usedb4;
rcc_autocharset_id charset_id;
@@ -141,7 +179,10 @@ char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id cla
}
}
- charset_id = rccConfigDetectCharset(config, class_id, buf, len);
+ class_type = rccGetClassType(ctx, class_id);
+
+ if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
+ else charset_id = rccConfigDetectCharset(config, class_id, buf, len);
if (charset_id != (rcc_autocharset_id)-1)
ocharset = rccConfigGetAutoCharsetName(config, charset_id);
else
diff --git a/src/rccconfig.c b/src/rccconfig.c
index a54b778..5fecb6b 100644
--- a/src/rccconfig.c
+++ b/src/rccconfig.c
@@ -127,7 +127,7 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = {
rcc_option_value_name rcc_sn_boolean[] = { "OFF", "ON", NULL };
rcc_option_value_name rcc_sn_learning[] = { "OFF", "ON", "RELEARN", "LEARN", NULL };
rcc_option_value_name rcc_sn_clo[] = { "ALL", "CONFIGURED_AND_AUTO", "CONFIGURED_ONLY", NULL };
-rcc_option_value_name rcc_sn_translate[] = { "OFF", "TO_ENGLISH", "SKIP_ENGLISH", "FULL", NULL };
+rcc_option_value_name rcc_sn_translate[] = { "OFF", "TO_ENGLISH", "SKIP_RELATED", "SKIP_PARRENT", "FULL", NULL };
rcc_option_description rcc_option_descriptions[RCC_MAX_OPTIONS+1];
rcc_option_description rcc_option_descriptions_embeded[RCC_MAX_OPTIONS+1] = {
@@ -197,6 +197,11 @@ int rccIsUTF8(const char *name) {
return 1;
}
+int rccIsUnicode(const char *name) {
+ if ((!name)||(strncasecmp(name, "UTF",3)&&strncasecmp(name, "UCS",3))) return 0;
+ return 1;
+}
+
unsigned int rccDefaultDropLanguageRelations(const char *lang) {
unsigned long i, j;
for (i=0,j=0;rcc_default_relations[i].lang;i++) {
diff --git a/src/rccconfig.h b/src/rccconfig.h
index fe7b912..7361910 100644
--- a/src/rccconfig.h
+++ b/src/rccconfig.h
@@ -38,5 +38,6 @@ rcc_language_id rccDefaultGetLanguageByName(const char *name);
unsigned int rccDefaultDropLanguageRelations(const char *lang);
int rccIsUTF8(const char *name);
+int rccIsUnicode(const char *name);
#endif /* _RCC_CONFIG_H */
diff --git a/src/rcciconv.c b/src/rcciconv.c
index 93278a7..b518cd7 100644
--- a/src/rcciconv.c
+++ b/src/rcciconv.c
@@ -7,6 +7,8 @@
#include "internal.h"
#include "rcciconv.h"
+#define RCC_MAX_ERRORS 3
+
static void rccIConvCopySymbol(char **in_buf, int *in_left, char **out_buf, int *out_left) {
if ((out_left>0)&&(in_left>0)) {
/* (**out_buf)=(**in_buf);
diff --git a/src/rcclocale.h b/src/rcclocale.h
index dc2c4e7..b6832ed 100644
--- a/src/rcclocale.h
+++ b/src/rcclocale.h
@@ -1,5 +1,8 @@
#ifndef _RCC_LOCALE_H
#define _RCC_LOCALE_H
+#define RCC_MAX_CHARSET_CHARS 16
+#define RCC_MAX_LANGUAGE_CHARS 16
+#define RCC_MAX_VARIABLE_CHARS 16
#endif /* _RCC_LOCALE_H */
diff --git a/src/recode.c b/src/recode.c
index 27dff92..ee9ac53 100644
--- a/src/recode.c
+++ b/src/recode.c
@@ -21,10 +21,17 @@
#define RCC_ACCEPTABLE_PROBABILITY 0
#define RCC_ACCEPTABLE_LENGTH 3
-static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring) {
+typedef enum rcc_detect_language_confidence_t {
+ RCC_DETECT_LANGUAGE_CONFIDENCE_UNSURE = 0,
+ RCC_DETECT_LANGUAGE_CONFIDENCE_ALMOST,
+ RCC_DETECT_LANGUAGE_CONFIDENCE_SURE,
+ RCC_DETECT_LANGUAGE_CONFIDENCE_CACHED
+} rcc_detect_language_confidence;
+
+static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len, rcc_string *retstring, rcc_detect_language_confidence *confidence) {
rcc_speller speller = NULL;
- unsigned long i, nlanguages;
- rcc_language_config config, config0 = NULL;
+ long i, nlanguages;
+ rcc_language_config config, config0 = NULL, config1 = NULL;
rcc_string recoded;
unsigned char *utf8;
size_t j, mode;
@@ -48,6 +55,9 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
unsigned long k;
rcc_language_id *parrents;
size_t chars = 0;
+ char llang[RCC_MAX_LANGUAGE_CHARS];
+ rcc_language_id locale_lang;
+ unsigned char defstep = 0;
unsigned long accepted_nonenglish_langs = 0;
@@ -61,6 +71,7 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
english_lang = rccStringGetLanguage(recoded);
if (retstring) *retstring = recoded;
else free(recoded);
+ if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_CACHED;
return english_lang;
}
}
@@ -72,17 +83,33 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
english_lang = rccGetLanguageByName(ctx, rcc_english_language_sn);
- for (i=0;i<nlanguages;i++) {
- if (i) config = rccGetUsableConfig(ctx, (rcc_language_id)i);
- else config = rccGetCurrentConfig(ctx);
- if (!config) continue;
-
+ for (i=0;i<nlanguages;(defstep>1)?i++:i) {
if (i) {
- if (config==config0) continue;
- } else config0=config;
+ config = rccGetUsableConfig(ctx, (rcc_language_id)i);
+ if ((!config)||(config==config0)||(config==config1)) continue;
+ } else {
+ switch (defstep) {
+ case 0:
+ config = rccGetCurrentConfig(ctx);
+ config0 = config;
+ break;
+ case 1:
+ if (!rccLocaleGetLanguage(llang ,ctx->locale_variable, RCC_MAX_LANGUAGE_CHARS)) {
+ locale_lang = rccGetLanguageByName(ctx, llang);
+ config = rccGetConfig(ctx, locale_lang);
+ } else config = NULL;
+ config1 = config;
+ break;
+ default:
+ config = NULL;
+ }
+ defstep++;
+ if ((!config)||(config0==config1)) continue;
+ }
+
if (bestfixlang != (rcc_language_id)-1) {
- parrents = ctx->language_parrents[i];
+ parrents = ((rcc_language_internal*)config->language)->parrents;
for (k = 0;parrents[k] != (rcc_language_id)-1;k++)
if (parrents[k] == bestfixlang) break;
@@ -192,6 +219,8 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
if (english_string) free(english_string);
if (retstring) *retstring = best_string;
else if (best_string) free(best_string);
+
+ if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_SURE;
return bestlang;
}
@@ -199,6 +228,8 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
if (best_string) free(best_string);
if (retstring) *retstring = english_string;
else if (english_string) free(english_string);
+
+ if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_SURE;
return english_lang;
}
@@ -206,6 +237,8 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
if (english_string) free(english_string);
if (retstring) *retstring = best_string;
else if (best_string) free(best_string);
+
+ if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_ALMOST;
return bestlang;
}
@@ -213,6 +246,8 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
if (best_string) free(best_string);
if (retstring) *retstring = english_string;
else if (english_string) free(english_string);
+
+ if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_ALMOST;
return english_lang;
}
@@ -220,89 +255,152 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
if (english_string) free(english_string);
if (retstring) *retstring = best_string;
else if (best_string) free(best_string);
+
+ if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_UNSURE;
return bestlang;
} else if (best_string) free(best_string);
if ((english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) {
if (retstring) *retstring = english_string;
else if (english_string) free(english_string);
+
+ if (confidence) *confidence = RCC_DETECT_LANGUAGE_CONFIDENCE_UNSURE;
return english_lang;
} else if (english_string) free(english_string);
return (rcc_language_id)-1;
}
-
rcc_language_id rccDetectLanguage(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) {
if (!ctx) {
if (rcc_default_ctx) ctx = rcc_default_ctx;
else return -1;
}
- return rccDetectLanguageInternal(ctx, class_id, buf, len, NULL);
+ return rccDetectLanguageInternal(ctx, class_id, buf, len, NULL, NULL);
}
-static rcc_autocharset_id rccConfigDetectCharsetInternal(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
- int err;
- rcc_context ctx;
- rcc_class_type class_type;
- rcc_engine_ptr engine;
- rcc_autocharset_id autocharset_id;
-
- if ((!buf)||(!config)) return (rcc_autocharset_id)-1;
+static int rccIsParrentLanguage(rcc_language_config config, rcc_language_id parrent) {
+ unsigned int i;
+ rcc_language_id language;
+ rcc_language_id *list;
+
+ language = rccConfigGetLanguage(config);
+ if (parrent == language) return 1;
- ctx = config->ctx;
+ list = ((rcc_language_internal*)config->language)->parrents;
+ for (i=0;list[i] != (rcc_language_id)-1;i++)
+ if (list[i] == parrent) return 1;
- err = rccConfigConfigure(config);
- if (err) return (rcc_autocharset_id)-1;
+ return 0;
+}
+
+
+static int rccAreRelatedLanguages(rcc_language_config c1, rcc_language_config c2) {
+ rcc_language_id l1, l2;
+
+ l1 = rccConfigGetLanguage(c1);
+ l2 = rccConfigGetLanguage(c2);
- class_type = rccGetClassType(ctx, class_id);
- if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) {
- rccMutexLock(config->mutex);
- engine = rccConfigGetCurrentEnginePointer(config);
- if ((engine)&&(engine->func)) autocharset_id = engine->func(&config->engine_ctx, buf, len);
- else autocharset_id = (rcc_autocharset_id)-1;
- rccMutexUnLock(config->mutex);
- return autocharset_id;
- }
+ if (rccIsParrentLanguage(c1, l2)) return 1;
+ if (rccIsParrentLanguage(c2, l1)) return 1;
- return (rcc_autocharset_id)-1;
+ return 0;
}
-rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
- return rccConfigDetectCharsetInternal(config, class_id, buf, len);
-}
+static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_id, const char *utfstring) {
+ rcc_context ctx;
+ rcc_language_config curconfig;
+
+ rcc_option_value translate;
+ rcc_class_type ctype;
+ rcc_language_id language_id, english_language_id, current_language_id;
-static int rccAreLanguagesRelated(rcc_context ctx, rcc_language_id l1, rcc_language_id l2, rcc_language_id skip) {
- unsigned int i;
- rcc_language_id *list;
+ char llang[RCC_MAX_LANGUAGE_CHARS];
- if ((l1 == skip)||(l2 == skip)) return 0;
+ rcc_translate trans, entrans;
- if (l1 == l2) return 1;
+ char *translated;
+
+ ctx = (*config)->ctx;
+
+ translate = rccGetOption(ctx, RCC_OPTION_TRANSLATE);
+ if (translate == RCC_OPTION_TRANSLATE_OFF) return NULL;
+
+ ctype = rccGetClassType(ctx, class_id);
+ if ((ctype != RCC_CLASS_TRANSLATE_LOCALE)&&(ctype != RCC_CLASS_TRANSLATE_CURRENT)&&(ctype != RCC_CLASS_TRANSLATE_FROM)) return NULL;
+
+ language_id = rccConfigGetLanguage(*config);
+
+ english_language_id = rccGetLanguageByName(ctx, rcc_english_language_sn);
- list = ctx->language_parrents[l1];
- for (i=0;list[i] != (rcc_language_id)-1;i++)
- if (list[i] == l2) return 1;
+ if (translate == RCC_OPTION_TRANSLATE_TO_ENGLISH) {
+ current_language_id = english_language_id ;
+ } else {
+ if (ctype == RCC_CLASS_TRANSLATE_LOCALE) {
+ if (!rccLocaleGetLanguage(llang ,ctx->locale_variable, RCC_MAX_LANGUAGE_CHARS))
+ current_language_id = rccGetLanguageByName(ctx, llang);
+ else
+ current_language_id = (rcc_language_id)-1;
+ } else
+ current_language_id = rccGetCurrentLanguage(ctx);
+ }
+
+ if (current_language_id == (rcc_language_id)-1) return NULL;
+ if (language_id == current_language_id) return NULL;
- list = ctx->language_parrents[l2];
- for (i=0;list[i] != (rcc_language_id)-1;i++)
- if (list[i] == l1) return 1;
+ curconfig = rccGetConfig(ctx, current_language_id);
+ if (!curconfig) return NULL;
- return 0;
+ if (rccConfigConfigure(curconfig)) return NULL;
+
+ if (translate == RCC_OPTION_TRANSLATE_SKIP_RELATED) {
+ if (rccAreRelatedLanguages(curconfig, *config)) return NULL;
+ }
+
+ if (translate == RCC_OPTION_TRANSLATE_SKIP_PARRENT) {
+ if (rccIsParrentLanguage(curconfig, language_id)) return NULL;
+ }
+
+ trans = rccConfigGetTranslator(*config, current_language_id);
+ if (trans) {
+ translated = rccTranslate(trans, utfstring);
+ if (translated) {
+ if ((!((rcc_language_internal*)curconfig->language)->latin)&&(rccIsASCII(translated))) {
+ free(translated);
+ translated = NULL;
+ }
+ }
+ } else translated = NULL;
+
+ if ((!translated)&&(current_language_id != english_language_id)&&(!rccAreRelatedLanguages(*config, curconfig))) {
+ curconfig = rccGetConfig(ctx, english_language_id);
+ if (!curconfig) return NULL;
+ if (rccConfigConfigure(curconfig)) return NULL;
+
+ entrans = rccConfigGetEnglishTranslator(*config);
+ if (entrans) translated = rccTranslate(entrans, utfstring);
+ }
+
+ if (translated) *config = curconfig;
+ return translated;
}
rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf, size_t len) {
int err;
size_t ret;
+ rcc_language_config config;
rcc_language_id language_id, detected_language_id;
rcc_autocharset_id charset_id;
rcc_iconv icnv = NULL;
rcc_string result;
+ rcc_class_type class_type;
rcc_option_value usedb4;
const char *charset;
+ char *translate = NULL;
+ rcc_detect_language_confidence confidence;
if (!ctx) {
if (rcc_default_ctx) ctx = rcc_default_ctx;
@@ -318,29 +416,38 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,
if (language_id == (rcc_language_id)-1) return NULL;
if (!strcasecmp(ctx->languages[language_id]->sn, rcc_disabled_language_sn)) return NULL;
-
+ class_type = rccGetClassType(ctx, class_id);
usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
-/*
- if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
- result = rccDb4GetKey(ctx->db4ctx, buf, len);
- if (result) {
- if (rccStringFixID(result, ctx)) free(result);
- else return result;
- }
- }
-
- if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) {
- detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len);
- if (detected_language_id != (rcc_language_id)-1)
- language_id = detected_language_id;
- }
-*/
- detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result);
+ detected_language_id = rccDetectLanguageInternal(ctx, class_id, buf, len, &result, &confidence);
if (detected_language_id != (rcc_language_id)-1) {
#ifdef RCC_DEBUG_LANGDETECT
- printf("Language %i(%s): %s\n", rccStringGetLanguage(result), rccStringGetLanguage(result)?rccGetLanguageName(ctx, rccStringGetLanguage(result)):"", result);
+ printf("Language %i(%s): %s\n", rccStringGetLanguage(result), rccStringGetLanguage(result)?rccGetLanguageName(ctx, rccStringGetLanguage(result)):"", result);
#endif /* RCC_DEBUG_LANGDETECT */
+
+ if ((result)&&(rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&(class_type == RCC_CLASS_TRANSLATE_FROM)) {
+ rccMutexLock(ctx->mutex);
+ config = rccGetCurrentConfig(ctx);
+ translate = rccRecodeTranslate(&config, class_id, rccStringGetString(result));
+ rccMutexUnLock(ctx->mutex);
+
+ if (translate) {
+ language_id = rccConfigGetLanguage(config);
+ free(result);
+ result = rccCreateString(language_id, translate, 0);
+ }
+ }
+
+
+ if ((result)&&
+ (usedb4&RCC_OPTION_LEARNING_FLAG_LEARN)&&
+ (confidence!=RCC_DETECT_LANGUAGE_CONFIDENCE_CACHED)&&
+ ((language_id==detected_language_id)||(confidence!=RCC_DETECT_LANGUAGE_CONFIDENCE_UNSURE))&&
+ (!rccStringSetLang(result, ctx->languages[language_id]->sn))) {
+
+ rccDb4SetKey(ctx->db4ctx, buf, len, result);
+ }
+
return result;
}
@@ -349,7 +456,8 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,
if (err) return NULL;
rccMutexLock(ctx->mutex);
- charset_id = rccDetectCharset(ctx, class_id, buf, len);
+ if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
+ else charset_id = rccDetectCharset(ctx, class_id, buf, len);
if (charset_id != (rcc_autocharset_id)-1) {
icnv = ctx->iconv_auto[charset_id];
if (rccGetOption(ctx, RCC_OPTION_AUTOENGINE_SET_CURRENT)) {
@@ -362,10 +470,24 @@ rcc_string rccSizedFrom(rcc_context ctx, rcc_class_id class_id, const char *buf,
if (icnv) {
ret = rccIConvInternal(ctx, icnv, buf, len);
if (ret == (size_t)-1) return NULL;
- result = rccCreateString(language_id, ctx->tmpbuffer, ret);
+
+ if ((rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&(rccGetClassType(ctx, class_id) == RCC_CLASS_TRANSLATE_FROM)) {
+ config = rccGetCurrentConfig(ctx);
+ translate = rccRecodeTranslate(&config , class_id, ctx->tmpbuffer);
+ if (translate) language_id = rccConfigGetLanguage(config);
+ }
+
+ result = rccCreateString(language_id, translate?translate:ctx->tmpbuffer, translate?0:ret);
} else {
- result = rccCreateString(language_id, buf, len);
+ if ((rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&(rccGetClassType(ctx, class_id) == RCC_CLASS_TRANSLATE_FROM)) {
+ config = rccGetCurrentConfig(ctx);
+ translate = rccRecodeTranslate(&config , class_id, buf);
+ if (translate) language_id = rccConfigGetLanguage(config);
+ }
+
+ result = rccCreateString(language_id, translate?translate:buf, translate?0:len);
}
+
rccMutexUnLock(ctx->mutex);
if ((result)&&(usedb4&RCC_OPTION_LEARNING_FLAG_LEARN)) {
@@ -385,13 +507,7 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s
char *translated = NULL;
rcc_language_config config;
rcc_language_id language_id;
- rcc_language_id current_language_id;
- rcc_language_id english_language_id;
rcc_class_type class_type;
- rcc_option_value translate;
- rcc_translate trans, entrans;
- const char *langname;
- unsigned char english_source;
rcc_iconv icnv;
if (!ctx) {
@@ -414,74 +530,10 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s
if (err) return NULL;
class_type = rccGetClassType(ctx, class_id);
- translate = rccGetOption(ctx, RCC_OPTION_TRANSLATE);
- langname = rccGetLanguageName(ctx, language_id);
- if (strcasecmp(langname, rcc_english_language_sn)) english_source = 0;
- else english_source = 1;
-
- if ((class_type != RCC_CLASS_FS)&&((translate==RCC_OPTION_TRANSLATE_FULL)||((translate)&&(!english_source)))) {
- english_language_id = rccGetLanguageByName(ctx, rcc_english_language_sn);
-
+ if (((class_type == RCC_CLASS_TRANSLATE_LOCALE)||(class_type == RCC_CLASS_TRANSLATE_CURRENT))&&(rccGetOption(ctx, RCC_OPTION_TRANSLATE))) {
rccMutexLock(ctx->mutex);
-
- current_language_id = rccGetCurrentLanguage(ctx);
- if (current_language_id != language_id) {
- if (translate != RCC_OPTION_TRANSLATE_TO_ENGLISH) {
- trans = rccConfigGetTranslator(config, current_language_id);
- if (trans) {
- translated = rccTranslate(trans, utfstring);
- if (translated) {
- if ((current_language_id != english_language_id)&&(rccIsASCII(translated))) {
- /* Ffrench to german (no umlauts) => not related
- english to german (no umlauts) => skiping english relations
- DS: Problem if we have relation between french and german */
- if (rccAreLanguagesRelated(ctx, language_id, current_language_id, english_language_id)) {
- free(translated);
- translated = NULL;
- translate = 0;
- }
- }
- }
- if (translated) {
- language_id = current_language_id;
-
- config = rccGetConfig(ctx, language_id);
- if (!config) {
- rccMutexUnLock(ctx->mutex);
- free(translated);
- return NULL;
- }
-
- err = rccConfigConfigure(config);
- if (err) {
- rccMutexUnLock(ctx->mutex);
- free(translated);
- return NULL;
- }
- }
- }
- }
-
- if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||((translate)&&(!translated)&&(!english_language_id == current_language_id)&&(!rccAreLanguagesRelated(ctx, language_id, current_language_id, (rcc_language_id)-1)))) {
- entrans = rccConfigGetEnglishTranslator(config);
- if (entrans) {
- translated = rccTranslate(config->entrans, utfstring);
-/*
- config = rccGetConfig(ctx, language_id);
- if (!config) {
- rccMutexUnLock(ctx->mutex);
- return translated;
- }
-
- err = rccConfigConfigure(config);
- if (err) {
- rccMutexUnLock(ctx->mutex);
- return translated;
- }*/
- }
- }
- }
+ translated = rccRecodeTranslate(&config, class_id, utfstring);
rccMutexUnLock(ctx->mutex);
}
@@ -492,7 +544,7 @@ char *rccSizedTo(rcc_context ctx, rcc_class_id class_id, rcc_const_string buf, s
return result;
}
}
-
+
rccMutexLock(ctx->mutex);
rccMutexLock(config->mutex);
icnv = config->iconv_to[class_id];
@@ -536,10 +588,14 @@ char *rccSizedRecode(rcc_context ctx, rcc_class_id from, rcc_class_id to, const
if ((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) goto recoding;
if (rccGetOption(ctx, RCC_OPTION_LEARNING_MODE)) goto recoding;
if (rccGetOption(ctx, RCC_OPTION_AUTODETECT_LANGUAGE)) goto recoding;
- if (rccGetOption(ctx, RCC_OPTION_TRANSLATE)) goto recoding;
+ if ((rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&((class_type == RCC_CLASS_TRANSLATE_LOCALE)||(class_type == RCC_CLASS_TRANSLATE_CURRENT))) goto recoding;
+
+ class_type = rccGetClassType(ctx, from);
+ if ((rccGetOption(ctx, RCC_OPTION_TRANSLATE))&&(class_type == RCC_CLASS_TRANSLATE_FROM)) goto recoding;
rccMutexLock(ctx->mutex);
- from_charset_id = rccDetectCharset(ctx, from, buf, len);
+ if (class_type == RCC_CLASS_KNOWN) from_charset_id = (rcc_autocharset_id)-1;
+ else from_charset_id = rccDetectCharset(ctx, from, buf, len);
if (from_charset_id != (rcc_charset_id)-1) {
from_charset = rccGetAutoCharsetName(ctx, from_charset_id);
to_charset = rccGetCurrentCharsetName(ctx, to);
@@ -606,6 +662,18 @@ char *rccFS(rcc_context ctx, rcc_class_id from, rcc_class_id to, const char *fsp
rccMutexUnLock(config->mutex);
rccMutexUnLock(ctx->mutex);
} else result = NULL;
+
+ if (!result) {
+ config = rccGetCurrentConfig(ctx);
+ if (config) {
+ rccMutexLock(ctx->mutex);
+ rccMutexLock(config->mutex);
+ result = rccFS3(config, to, prefix, rccStringGetString(string));
+ rccMutexUnLock(config->mutex);
+ rccMutexUnLock(ctx->mutex);
+ }
+ }
+
free(string);
} else result = NULL;
diff --git a/ui/rccnames.c b/ui/rccnames.c
index d18f524..8b5b4a0 100644
--- a/ui/rccnames.c
+++ b/ui/rccnames.c
@@ -32,7 +32,7 @@ rcc_name rcc_default_language_names_embeded[RCC_MAX_LANGUAGES+1] = {
rcc_option_value_name rcc_default_option_boolean_names[] = { "Off", "On", NULL };
rcc_option_value_name rcc_default_option_learning_names[] = { "Off", "On", "Relearn", "Learn", NULL };
rcc_option_value_name rcc_default_option_clo_names[] = { "All Languages", "Configured / AutoEngine", "Configured Only", NULL };
-rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Translate to English", "Skip English Translation", "Full", NULL };
+rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Translate to English", "Skip Translation between Related Languages", "Skip Translation from Parrent Languages", "Full", NULL };
rcc_option_name rcc_default_option_names[RCC_MAX_OPTIONS+1];
rcc_option_name rcc_default_option_names_embeded[RCC_MAX_OPTIONS+1] = {