Recognize words containing spaces (such as ‘New York’) in ABBYY FineReader Engine by adding the space character to the language alphabet, the dictionary, and OneWordPerLine.
Use this file to discover all available pages before exploring further.
ABBYY FineReader Engine allows you to add words with spaces to a dictionary. This feature can be very useful for checking words like “New York.” We recommend using a dictionary for words with spaces during field-level recognition when you can recognize the fields (small image chunks which contain short text fragments) using some specific information about the kind of data they can contain. The “New York” word, for example, may be useful if you are recognizing addresses.To recognize words with spaces, do the following:
Add the “space” character to the alphabet of the current language.
Add the necessary words with spaces to the dictionary.
Add the UserDictionaryDescription object to the DictionaryDescriptions collection of the BaseLanguage object.
In the RecognizerParams object of all text blocks, assign the previously created TextLanguage object to the TextLanguage property and the TRUE value to the OneWordPerLine property.
Below you can see a sample in which the “space” character has been added to the alphabet of the English language, and the word “New York” has been added to the dictionary.
C++ (COM) code
// Create a LanguageDatabase objectFREngine::ILanguageDatabasePtr pLanguageDatabase = Engine->CreateLanguageDatabase();// Create a new TextLanguage objectFREngine::ITextLanguagePtr pTextLanguage = pLanguageDatabase->CreateTextLanguage();// Copy all attributes from the predefined English languageFREngine::ITextLanguagePtr pEnglishLanguage = Engine->PredefinedLanguages->Find( "English" )->TextLanguage;pTextLanguage->CopyFrom( pEnglishLanguage );pTextLanguage->InternalName = L"SampleTL";// Bind new dictionary to the first (and only) BaseLanguage object within TextLanguageFREngine::IBaseLanguagePtr pBaseLanguage = pTextLanguage->BaseLanguages->Item(0);// Change the internal dictionary name to a user-defined namepBaseLanguage->InternalName = L"SampleBL"; // Add the "space" character_bstr_t alphabet = pBaseLanguage->GetLetterSet( FREngine::BLLS_Alphabet );pBaseLanguage->put_LetterSet( FREngine::BLLS_Alphabet, alphabet + L" " );// Create new dictionary_bstr_t dictionaryFile = L"D:\\sample.amd";FREngine::IDictionaryPtr pDictionary =pLanguageDatabase->CreateNewDictionary( dictionaryFile, FREngine::LI_EnglishUnitedStates );pDictionary->Name = L"Sample";// Add words with space to the dictionarypDictionary->AddWord( "New York", 100 );// Get the collection of dictionary descriptions and remove all itemsFREngine::IDictionaryDescriptionsPtr pDictionaryDescriptions = pBaseLanguage->DictionaryDescriptions;pDictionaryDescriptions->DeleteAll();// Create a user dictionary description and add it to the collectionFREngine::IDictionaryDescriptionPtr dic = pDictionaryDescriptions->AddNew(FREngine::DT_UserDictionary);// Specify the path to the dictionary which contains words with spacesFREngine::IUserDictionaryDescriptionPtr userDic = dic->GetAsUserDictionaryDescription();userDic->FileName = dictionaryFile;FREngine::ILayoutPtr pLayout;...// Specify the properties of the RecognizerParams object of all text blocks// Iterate blocksfor( int i = pLayout->Blocks->Count - 1; i >= 0; i-- ) { FREngine::BlockTypeEnum blockType = pLayout->Blocks->Item( i )->Type; // Find the text block if( blockType != FREngine::BT_Text ) { pLayout->Blocks->DeleteAt(i); } else { pLayout->Blocks->Item(i)->GetAsTextBlock()->RecognizerParams-> TextLanguage = pTextLanguage; pLayout->Blocks->Item(i)->GetAsTextBlock()->RecognizerParams-> OneWordPerLine = VARIANT_TRUE; }}...
C# code
// Create a LanguageDatabase objectFREngine.ILanguageDatabase languageDatabase = engineLoader.Engine.CreateLanguageDatabase();// Create a new TextLanguage objectFREngine.ITextLanguage textLanguage = languageDatabase.CreateTextLanguage();// Copy all attributes from the predefined English languageFREngine.ITextLanguage englishLanguage = engineLoader.Engine.PredefinedLanguages.Find( "English" ).TextLanguage;textLanguage.CopyFrom( englishLanguage );textLanguage.InternalName = "SampleTL";// Bind new dictionary to the first (and only) BaseLanguage object within TextLanguageFREngine.IBaseLanguage baseLanguage = textLanguage.BaseLanguages[0];// Change the internal dictionary name to a user-defined namebaseLanguage.InternalName = "SampleBL";// Add the "space" characterstring alphabet = baseLanguage.get_LetterSet( FREngine.BaseLanguageLetterSetEnum.BLLS_Alphabet );baseLanguage.set_LetterSet( FREngine.BaseLanguageLetterSetEnum.BLLS_Alphabet, alphabet + " " );// Create new dictionarystring dictionaryFilePath = "D:\\sample.amd";FREngine.IDictionary dictionary = languageDatabase.CreateNewDictionary( dictionaryFilePath, FREngine.LanguageIdEnum.LI_EnglishUnitedStates );dictionary.Name = "Sample";// Add words with space to the dictionarydictionary.AddWord( "New York", 100 );// Get the collection of dictionary descriptions and remove all itemsFREngine.IDictionaryDescriptions dictionaryDescriptions = baseLanguage.DictionaryDescriptions;dictionaryDescriptions.DeleteAll();// Create a user dictionary description and add it to the collectionFREngine.IDictionaryDescription dic = dictionaryDescriptions.AddNew(FREngine.DictionaryTypeEnum.DT_UserDictionary);// Specify the path to the dictionary which contains words with spacesFREngine.IUserDictionaryDescription userDic = dic.GetAsUserDictionaryDescription();userDic.FileName = dictionaryFilePath;FREngine.ILayout layout;...// Specify the properties of the RecognizerParams object of all text blocks// Iterate blocksfor( int i = layout.Blocks.Count - 1; i >= 0; i-- ) { FREngine.IBlockTypeEnum blockType = layout.Blocks[i].Type; // Find the text block if( blockType != FREngine.BlockTypeEnum.BT_Text ) { layout.Blocks.DeleteAt(i); } else { layout.Blocks[i].GetAsTextBlock().RecognizerParams.TextLanguage = textLanguage; layout.Blocks[i].GetAsTextBlock().RecognizerParams.OneWordPerLine = true; }}...