API Documentation
Documents
Document Conversion
Personal Earnings Statement
Certificate Of Origin
Dangerous Goods Declaration
International Consignment Note
Get Extracted Text
This endpoint retrieves a document’s text that was created by the process
operation using the image-to-text
model.
from abbyy_document_ai import DocumentAi
import os
with DocumentAi(
api_key_auth=os.getenv("DOCUMENTAI_API_KEY_AUTH", ""),
) as document_ai:
res = document_ai.models.image_to_text.get_extracted_text(document_id="wh23anb5xjf0ntw5taase5qz")
assert res.extracted_text is not None
# Handle response
print(res.extracted_text)
{
"meta": {
"id": "wh23anb5xjf0ntw5taase5qz",
"name": "document.jpg",
"createdAt": "2025-02-07T17:44:06.949Z",
"model": "image-to-text",
"status": "Processed",
"pageCount": 10,
"errors": [
{
"message": "Total is required"
}
]
},
"text": {
"languages": [
"EnglishUnitedStates"
],
"content": {
"paragraphs": [
{
"id": "paragraph_1",
"formatting": {
"aligning": "left",
"lineSpacing": 1
},
"layoutReferences": [
{
"blockId": "A77B148A0BE1369723FA10D9BF74D6A4",
"blockType": "text",
"sectionIndex": 0,
"columnIndex": 0,
"lineNumbering": false,
"parIndex": 0,
"firstLine": 0,
"lastLine": 2
}
],
"listReference": {
"id": "list_1",
"levelIndex": 0,
"ordinalNumber": 0
},
"role": "text",
"text": "Sample paragraph text"
}
],
"lists": [
{
"id": "list_1",
"listLevels": [
{
"levelIndex": 0,
"numberingStyle": "Decimal",
"startNumber": 1
}
]
}
]
},
"layout": {
"corrected": false,
"pages": [
{
"width": 3472,
"height": 4624,
"rotated": "none",
"texts": [
{
"id": "text_1",
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
},
"lines": [
{
"confidence": 95,
"position": {
"l": 110,
"t": 210,
"r": 290,
"b": 390
},
"text": "Sample text line",
"charParams": {
"bold": true,
"fontSize": 12,
"fontName": "Arial",
"color": "#000000",
"lang": "en"
},
"words": [
{
"confidence": 98,
"position": {
"l": 115,
"t": 215,
"r": 285,
"b": 385
},
"chars": [
{
"confidence": 99,
"text": "S",
"position": {
"l": 115,
"t": 215,
"r": 125,
"b": 225
}
}
]
}
]
}
],
"confidence": 95
}
],
"tables": [
{
"id": "table_1",
"confidence": 85,
"position": {
"l": 400,
"t": 500,
"r": 800,
"b": 900
},
"cells": [
{
"id": "ADDF4892A3318B0193621EEDA6F9E308",
"colRowPosition": {
"l": 0,
"t": 0,
"r": 1,
"b": 1
},
"borders": {
"l": "invisible",
"t": "invisible",
"r": "invisible",
"b": "invisible"
},
"contentType": "text",
"position": {
"l": 861,
"t": 1581,
"r": 2428,
"b": 1768
},
"lines": [
{
"confidence": 92,
"position": {
"l": 863,
"t": 1582,
"r": 2425,
"b": 1737
},
"text": "QTY SKU PRICE",
"charParams": {
"fontSize": 240,
"fontName": "Arial",
"lang": "en-US"
},
"words": [
{
"confidence": 97,
"position": {
"l": 863,
"t": 1616,
"r": 984,
"b": 1679
},
"text": "QTY",
"chars": [
{
"confidence": 96,
"text": "Q",
"position": {
"l": 863,
"t": 1619,
"r": 904,
"b": 1676
}
},
{
"confidence": 98,
"text": "T",
"position": {
"l": 906,
"t": 1618,
"r": 945,
"b": 1667
}
},
{
"confidence": 97,
"text": "Y",
"position": {
"l": 943,
"t": 1618,
"r": 984,
"b": 1666
}
}
]
},
{
"confidence": 81,
"position": {
"l": 1143,
"t": 1617,
"r": 1258,
"b": 1671
},
"text": "SKU",
"chars": [
{
"confidence": 87,
"text": "S",
"position": {
"l": 1143,
"t": 1619,
"r": 1186,
"b": 1668
}
},
{
"confidence": 90,
"text": "K",
"position": {
"l": 1182,
"t": 1618,
"r": 1219,
"b": 1669
}
},
{
"confidence": 67,
"text": "U",
"position": {
"l": 1216,
"t": 1620,
"r": 1258,
"b": 1667
}
}
]
},
{
"confidence": 95,
"position": {
"l": 2243,
"t": 1632,
"r": 2425,
"b": 1692
},
"text": "PRICE",
"chars": [
{
"confidence": 98,
"text": "P",
"position": {
"l": 2243,
"t": 1636,
"r": 2274,
"b": 1687
}
},
{
"confidence": 95,
"text": "R",
"position": {
"l": 2277,
"t": 1637,
"r": 2312,
"b": 1685
}
},
{
"confidence": 96,
"text": "I",
"position": {
"l": 2319,
"t": 1638,
"r": 2345,
"b": 1685
}
},
{
"confidence": 98,
"text": "C",
"position": {
"l": 2356,
"t": 1637,
"r": 2389,
"b": 1684
}
},
{
"confidence": 90,
"text": "E",
"position": {
"l": 2395,
"t": 1639,
"r": 2425,
"b": 1685
}
}
]
}
]
}
],
"confidence": 92
}
]
}
],
"checkmarks": [
{
"confidence": 95,
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
},
"value": "checked"
}
],
"barcodes": [
{
"id": "barcode_1",
"confidence": 95,
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
},
"type": "QRCode",
"value": "12345",
"supplementType": "none",
"supplementValue": ""
}
],
"pictures": [
{
"id": "picture_1",
"confidence": 95,
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
}
}
],
"separators": [
{
"confidence": 95,
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
},
"type": "solid",
"color": -1,
"thickness": 2,
"endPoints": {
"startX": 0,
"startY": 351,
"endX": 3472,
"endY": 351
}
}
]
}
]
}
}
}
Authorizations
Our API offers authentication via API keys. You can obtain an API key from https://developer.abbyy.com
Path Parameters
The id of the document
Response
The response from the getExtractedText
endpoint.
Represents a document that has been submitted for processing.
The id of the document.
The name of the document.
Timestamp at which the document was created.
The status of the document. Valid values are Pending
, Processing
, Processed
, Failed
, Canceled
.
Pending
, Processing
, Processed
, Failed
, Canceled
The number of pages associated with the document that have been processed. This number will always be at least 1.
The model used to process the document.
{
"id": "wh23anb5xjf0ntw5taase5qz",
"name": "document.jpg",
"createdAt": "2025-02-07T17:44:06.949Z",
"model": "image-to-text",
"status": "Processed",
"pageCount": 10,
"errors": [{ "message": "Total is required" }]
}
List of document languages.
Document language.
Layout (physical structure).
Array of document pages ordered from first to last.
Document page.
The width of the page in pixels.
The height of the page in points.
Rotation of the page relative to the original image.
none
, clockwise
, counterclockwise
, upside-down
An array of blocks with text.
Text.
Text block ID.
Rectangle {r:left, t:top, r:right, b:bottom}
An array of text strings in the text block ordered by reading direction.
A line of text.
Confidence.
Array of blocks with barcode.
Barcode.
Barcode block ID.
Confidence.
Rectangle {r:left, t:top, r:right, b:bottom}
Barcode type.
Code39
, Interleaved25
, EAN13
, Code128
, EAN8
, PDF417
, Codabar
, UPCE
, Industrial25
, IATA25
, Matrix25
, Code93
, PostNet
, UCC128
, Patch
, Aztec
, DataMatrix
, QRCode
, UPCA
, MaxiCode
, Code32
, FullAscii
, IntelligentMail
, RoyalMail4State
, KIX
, Australia4State
, JapanPost
, NotFound
Barcode value.
Supplement value.
Supplement type.
none
, 2digits
, 5digits
Array of blocks with separator.
Separator.
Confidence.
Rectangle {r:left, t:top, r:right, b:bottom}
Separator type.
solid
, unknown
, dotted
Separator color.
Separator thickness.
Coordinates for the corrected image on the output.
Document content (logical structure).
An array of document paragraphs ordered by reading direction.
Document paragraph.
Paragraph ID of the document.
Paragraph role in the document.
other
, text
, heading
, headingNumber
, tableOfContents
, tableText
, runningTitle
, endNote
, footNote
, tableCaption
, tableHeading
, pictureCaption
, artefact
An array of document paragraph parts sources in the page block structure, in the order of the text in the paragraph.
Reference block/cell id, paragraph index and line range in the block/cell.
Containing text block or table cell ID.
Paragraph container type.
text
, cell
Index of logical section on page (or -1 if there is no section containing the paragraph - like headers, footers etc.).
x >= -1
Index of column in logical section (or -1 if there is no column containing the paragraph - like incuts, footnotes etc).
x >= -1
true if the paragraph is contained in line numbering area.
Paragraph index in block or table cell.
x >= 0
Index of the first line of the paragraph in block or table cell.
x >= 0
Index of the last line of the paragraph in block or table cell.
x >= 0
Paragraph text.
Reference to list containing the paragraph.
List ID.
Current list item level index.
x >= 0
Current list item ordinal number.
x >= -1
An array of lists found in the document.
List description.
List ID in the document.
An array of list level descriptions.
List level desctiption
Level index.
x >= 0
List numbering style.
None
, Decimal
, UpperRoman
, LowerRoman
, UpperLetter
, LowerLetter
, Ordinal
, CardinalText
, OrdinalText
, Hex
, Chicago
, IdeographDigital
, JapaneseCounting
, Aiueo
, Iroha
, DecimalFullWidth
, DecimalHalfWidth
, JapaneseLegal
, JapaneseDigitalTenThousand
, DecimalEnclosedCircle
, DecimalFullWidth2
, AiueoFullWidth
, IrohaFullWidth
, DecimalZero
, Bullet
, Ganada
, Chosung
, DecimalEnclosedFullstop
, DecimalEnclosedParen
, DecimalEnclosedCircleChinese
, IdeographEnclosedCircle
, IdeographTraditional
, IdeographZodiac
, IdeographZodiacTraditional
, TaiwaneseCounting
, IdeographLegalTraditional
, TaiwaneseCountingThousand
, TaiwaneseDigital
, ChineseCounting
, ChineseLegalSimplified
, ChineseCountingThousand
, ApplicationDefined
, KoreanDigital
, KoreanCounting
, KoreanLegal
, KoreanDigital2
, Hebrew1
, ArabicAlpha
, Hebrew2
, ArabicAbjad
, HindiVowels
, HindiConsonants
, HindiNumbers
, HindiCounting
, ThaiLetters
, ThaiNumbers
, ThaiCounting
, VietnameseCounting
, NumberInDash
, RussianLower
, RussianUpper
, Burmese
, Unnumbered
Start number.
Was this page helpful?
from abbyy_document_ai import DocumentAi
import os
with DocumentAi(
api_key_auth=os.getenv("DOCUMENTAI_API_KEY_AUTH", ""),
) as document_ai:
res = document_ai.models.image_to_text.get_extracted_text(document_id="wh23anb5xjf0ntw5taase5qz")
assert res.extracted_text is not None
# Handle response
print(res.extracted_text)
{
"meta": {
"id": "wh23anb5xjf0ntw5taase5qz",
"name": "document.jpg",
"createdAt": "2025-02-07T17:44:06.949Z",
"model": "image-to-text",
"status": "Processed",
"pageCount": 10,
"errors": [
{
"message": "Total is required"
}
]
},
"text": {
"languages": [
"EnglishUnitedStates"
],
"content": {
"paragraphs": [
{
"id": "paragraph_1",
"formatting": {
"aligning": "left",
"lineSpacing": 1
},
"layoutReferences": [
{
"blockId": "A77B148A0BE1369723FA10D9BF74D6A4",
"blockType": "text",
"sectionIndex": 0,
"columnIndex": 0,
"lineNumbering": false,
"parIndex": 0,
"firstLine": 0,
"lastLine": 2
}
],
"listReference": {
"id": "list_1",
"levelIndex": 0,
"ordinalNumber": 0
},
"role": "text",
"text": "Sample paragraph text"
}
],
"lists": [
{
"id": "list_1",
"listLevels": [
{
"levelIndex": 0,
"numberingStyle": "Decimal",
"startNumber": 1
}
]
}
]
},
"layout": {
"corrected": false,
"pages": [
{
"width": 3472,
"height": 4624,
"rotated": "none",
"texts": [
{
"id": "text_1",
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
},
"lines": [
{
"confidence": 95,
"position": {
"l": 110,
"t": 210,
"r": 290,
"b": 390
},
"text": "Sample text line",
"charParams": {
"bold": true,
"fontSize": 12,
"fontName": "Arial",
"color": "#000000",
"lang": "en"
},
"words": [
{
"confidence": 98,
"position": {
"l": 115,
"t": 215,
"r": 285,
"b": 385
},
"chars": [
{
"confidence": 99,
"text": "S",
"position": {
"l": 115,
"t": 215,
"r": 125,
"b": 225
}
}
]
}
]
}
],
"confidence": 95
}
],
"tables": [
{
"id": "table_1",
"confidence": 85,
"position": {
"l": 400,
"t": 500,
"r": 800,
"b": 900
},
"cells": [
{
"id": "ADDF4892A3318B0193621EEDA6F9E308",
"colRowPosition": {
"l": 0,
"t": 0,
"r": 1,
"b": 1
},
"borders": {
"l": "invisible",
"t": "invisible",
"r": "invisible",
"b": "invisible"
},
"contentType": "text",
"position": {
"l": 861,
"t": 1581,
"r": 2428,
"b": 1768
},
"lines": [
{
"confidence": 92,
"position": {
"l": 863,
"t": 1582,
"r": 2425,
"b": 1737
},
"text": "QTY SKU PRICE",
"charParams": {
"fontSize": 240,
"fontName": "Arial",
"lang": "en-US"
},
"words": [
{
"confidence": 97,
"position": {
"l": 863,
"t": 1616,
"r": 984,
"b": 1679
},
"text": "QTY",
"chars": [
{
"confidence": 96,
"text": "Q",
"position": {
"l": 863,
"t": 1619,
"r": 904,
"b": 1676
}
},
{
"confidence": 98,
"text": "T",
"position": {
"l": 906,
"t": 1618,
"r": 945,
"b": 1667
}
},
{
"confidence": 97,
"text": "Y",
"position": {
"l": 943,
"t": 1618,
"r": 984,
"b": 1666
}
}
]
},
{
"confidence": 81,
"position": {
"l": 1143,
"t": 1617,
"r": 1258,
"b": 1671
},
"text": "SKU",
"chars": [
{
"confidence": 87,
"text": "S",
"position": {
"l": 1143,
"t": 1619,
"r": 1186,
"b": 1668
}
},
{
"confidence": 90,
"text": "K",
"position": {
"l": 1182,
"t": 1618,
"r": 1219,
"b": 1669
}
},
{
"confidence": 67,
"text": "U",
"position": {
"l": 1216,
"t": 1620,
"r": 1258,
"b": 1667
}
}
]
},
{
"confidence": 95,
"position": {
"l": 2243,
"t": 1632,
"r": 2425,
"b": 1692
},
"text": "PRICE",
"chars": [
{
"confidence": 98,
"text": "P",
"position": {
"l": 2243,
"t": 1636,
"r": 2274,
"b": 1687
}
},
{
"confidence": 95,
"text": "R",
"position": {
"l": 2277,
"t": 1637,
"r": 2312,
"b": 1685
}
},
{
"confidence": 96,
"text": "I",
"position": {
"l": 2319,
"t": 1638,
"r": 2345,
"b": 1685
}
},
{
"confidence": 98,
"text": "C",
"position": {
"l": 2356,
"t": 1637,
"r": 2389,
"b": 1684
}
},
{
"confidence": 90,
"text": "E",
"position": {
"l": 2395,
"t": 1639,
"r": 2425,
"b": 1685
}
}
]
}
]
}
],
"confidence": 92
}
]
}
],
"checkmarks": [
{
"confidence": 95,
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
},
"value": "checked"
}
],
"barcodes": [
{
"id": "barcode_1",
"confidence": 95,
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
},
"type": "QRCode",
"value": "12345",
"supplementType": "none",
"supplementValue": ""
}
],
"pictures": [
{
"id": "picture_1",
"confidence": 95,
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
}
}
],
"separators": [
{
"confidence": 95,
"position": {
"l": 100,
"t": 200,
"r": 300,
"b": 400
},
"type": "solid",
"color": -1,
"thickness": 2,
"endPoints": {
"startX": 0,
"startY": 351,
"endX": 3472,
"endY": 351
}
}
]
}
]
}
}
}