Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.abbyy.com/llms.txt

Use this file to discover all available pages before exploring further.

Here’s the OCR JSON schema used when you export extracted data such as field values, field structure, and rule check errors.
  "$schema": "http://json-schema.org/draft-07/schema",
  "type": "object",
  "description": "Document.",
  "definitions": {
    "borderType": {
      "type": "string",
      "description": "Table cell border type.",
      "enum": [
        "unknown",
        "invisible",
        "visible"
      ]
    },
    "rect": {
      "type": "object",
      "description": "Rectangle {r:left, t:top, r:right, b:bottom}",
      "required": [
        "l",
        "t",
        "r",
        "b"
      ],
      "properties": {
        "l": {
          "type": "integer",
          "description": "Left coordinate."
        },
        "t": {
          "type": "integer",
          "description": "Top coordinate."
        },
        "r": {
          "type": "integer",
          "description": "Right coordinate."
        },
        "b": {
          "type": "integer",
          "description": "Bottom coordinate."
        }
      }
    },
    "confidence": {
      "type": "number",
      "description": "Confidence."
    },
    "listNumberingStyle": {
      "type": "string",
      "description": "List numbering style.",
      "enum": [
        "None",
        "Decimal",
        "UpperRoman",
        "LowerRoman",
        "UpperLetter",
        "LowerLetter",
        "Ordinal",
        "CardinalText",
        "OrdinalText",
        "Hex",
        "Chicago",
        "IdeographDigital",
        "JapaneseCounting",
        "Aiueo",
        "Iroha",
        "DecimalFullWidth",
        "DecimalHalfWidth",
        "JapaneseLegal",
        "JapaneseDigitalTenThousand",
        "DecimalEnclosedCircle",
        "DecimalFullWidth2",
        "AiueoFullWidth",
        "IrohaFullWidth",
        "DecimalZero",
        "Bullet",
        "Ganada",
        "Chosung",
        "DecimalEnclosedFullstop",
        "DecimalEnclosedParen",
        "DecimalEnclosedCircleChinese",
        "IdeographEnclosedCircle",
        "IdeographTraditional",
        "IdeographZodiac",
        "IdeographZodiacTraditional",
        "TaiwaneseCounting",
        "IdeographLegalTraditional",
        "TaiwaneseCountingThousand",
        "TaiwaneseDigital",
        "ChineseCounting",
        "ChineseLegalSimplified",
        "ChineseCountingThousand",
        "ApplicationDefined",
        "KoreanDigital",
        "KoreanCounting",
        "KoreanLegal",
        "KoreanDigital2",
        "Hebrew1",
        "ArabicAlpha",
        "Hebrew2",
        "ArabicAbjad",
        "HindiVowels",
        "HindiConsonants",
        "HindiNumbers",
        "HindiCounting",
        "ThaiLetters",
        "ThaiNumbers",
        "ThaiCounting",
        "VietnameseCounting",
        "NumberInDash",
        "RussianLower",
        "RussianUpper",
        "Burmese",
        "Unnumbered"
      ]
    },
    "listLevel": {
      "type": "object",
      "description": "List level desctiption",
      "required": [
        "levelIndex",
        "numberingStyle",
        "startNumber"
      ],
      "properties": {
        "levelIndex": {
          "type": "integer",
          "description": "Level index.",
          "minimum": 0
        },
        "numberingStyle": {
          "$ref": "#/definitions/listNumberingStyle",
          "description": "List numbering style."
        },
        "startNumber": {
          "type": "integer",
          "description": "Start number."
        }
      }
    },
    "picture": {
      "type": "object",
      "description": "Block with image.",
      "properties": {
        "id": {
          "type": "string",
          "description": "Picture block ID."
        },
        "position": {
          "$ref": "#/definitions/rect"
        },
        "confidence": {
          "$ref": "#/definitions/confidence"
        }
      }
    },
    "barcode": {
      "type": "object",
      "description": "Barcode.",
      "properties": {
        "id": {
          "type": "string",
          "description": "Barcode block ID."
        },
        "position": {
          "$ref": "#/definitions/rect"
        },
        "confidence": {
          "$ref": "#/definitions/confidence"
        },
        "type": {
          "type": "string",
          "description": "Barcode type.",
          "enum": [
            "Code39",
            "Interleaved25",
            "EAN13",
            "Code128",
            "EAN8",
            "PDF417",
            "Codabar",
            "UPCE",
            "Industrial25",
            "IATA25",
            "Matrix25",
            "Code93",
            "PostNet",
            "UCC128",
            "Patch",
            "Aztec",
            "DataMatrix",
            "QRCode",
            "UPCA",
            "MaxiCode",
            "Code32",
            "FullAscii",
            "IntelligentMail",
            "RoyalMail4State",
            "KIX",
            "Australia4State",
            "JapanPost",
            "NotFound"
          ]
        },
        "value": {
          "type": "string",
          "description": "Barcode value."
        },
        "supplementType": {
          "type": "string",
          "description": "Supplement type.",
          "enum": [
            "none",
            "2digits",
            "5digits"
          ]
        },
        "supplementValue": {
          "type": "string",
          "description": "Supplement value."
        }
      }
    },
    "lines": {
      "type": "array",
      "description": "An array of text strings in the text block ordered by reading direction.",
      "items": [
        {
          "type": "object",
          "description": "A line of text.",
          "properties": {
            "position": {
              "$ref": "#/definitions/rect"
            },
            "confidence": {
              "$ref": "#/definitions/confidence"
            },
            "text": {
              "type": "string",
              "description": "A line of text value."
            },
            "charParams": {
              "$ref": "#/definitions/charParams"
            },
            "words": {
              "type": "array",
              "description": "An array of words in a line of text ordered by reading direction.",
              "items": [
                {
                  "type": "object",
                  "description": "Word.",
                  "properties": {
                    "position": {
                      "$ref": "#/definitions/rect"
                    },
                    "confidence": {
                      "$ref": "#/definitions/confidence"
                    },
                    "text": {
                      "type": "string",
                      "description": "Word value."
                    },
                    "charParams": {
                      "$ref": "#/definitions/charParams"
                    },
                    "chars": {
                      "type": "array",
                      "description": "An array of characters in a word ordered by reading direction.",
                      "items": [
                        {
                          "type": "object",
                          "description": "Word character.",
                          "properties": {
                            "confidence": {
                              "$ref": "#/definitions/confidence"
                            },
                            "text": {
                              "type": "string",
                              "description": "Word character value."
                            },
                            "position": {
                              "$ref": "#/definitions/rect"
                            },
                            "charParams": {
                              "$ref": "#/definitions/charParams"
                            }
                          }
                        }
                      ]
                    }
                  }
                }
              ]
            }
          }
        }
      ]
    },
    "charParams": {
      "type": "object",
      "description": "Symbol font formatting parameters.",
      "properties": {
        "bold": {
          "type": "boolean",
          "description": "Bold property state - if it is different from the container's one",
          "default": false
        },
        "italic": {
          "type": "boolean",
          "description": "Italic property state - if it is different from the container's one",
          "default": false
        },
        "underlined": {
          "type": "boolean",
          "description": "Underlined property state - if it is different from the container's one",
          "default": false
        },
        "strikeout": {
          "type": "boolean",
          "description": "Strikeout property state - if it is different from the container's one",
          "default": false
        },
        "smallCaps": {
          "type": "boolean",
          "description": "SmallCaps property state - if it is different from the container's one",
          "default": false
        },
        "superscript": {
          "type": "boolean",
          "description": "Superscript property state - if it is different from the container's one",
          "default": false
        },
        "subscript": {
          "type": "boolean",
          "description": "Subscript property state - if it is different from the container's one",
          "default": false
        },
        "scaling": {
          "type": "integer",
          "description": "Scaling property state (in thousandths of 1) - if it is different from the container's one",
          "default": 1000,
          "minimum": 100,
          "maximum": 10000
        },
        "spacing": {
          "type": "integer",
          "description": "Spacing property state in twips (1/20 pt, 1/1440 inch) - if it is different from the container's one",
          "default": 0,
          "minimum": -1000,
          "maximum": 1000
        },
        "fontSize": {
          "type": "integer",
          "description": "Font size property state in twips (1/20 pt, 1/1440 inch) - if it is different from the container's one",
          "default": 200,
          "minimum": 50,
          "maximum": 4000
        },
        "fontName": {
          "type": "string",
          "description": "Font name property state - if it is different from the container's one"
        },
        "color": {
          "type": "string",
          "description": "Symbol font color property state (hexadecimal value in 6-digits format RRGGBB) - if it is different from the container's one"
        },
        "backgroundColor": {
          "type": "string",
          "description": "Symbol background color property state (hexadecimal value in 6-digits format RRGGBB) - if it is different from the container's one"
        },
        "lang": {
          "type": "string",
          "description": "Symbol language property state (consists of language and country parts, please refer to ISO 639 and ISO 3166) - if it is different from the container's one"
        }
      }
    }
  },
  "required": [
    "version",
    "producer"
  ],
  "properties": {
    "version": {
      "type": "string",
      "description": "Document schema version.",
      "default": "Vantage OCR.Skill JSON output v1.0"
    },
    "producer": {
      "type": "string",
      "description": "JSON file author.",
      "default": "ABBYY Vantage OCR.Skill"
    },
    "languages": {
      "type": "array",
      "description": "List of document languages.",
      "items": [
        {
          "type": "string",
          "description": "Document language."
        }
      ]
    },
    "layout": {
      "type": "object",
      "description": "Layout (physical structure).",
      "required": [
        "pages"
      ],
      "properties": {
        "corrected": {
          "type": "boolean",
          "description": "Coordinates for the corrected image on the output.",
          "default": true
        },
        "pages": {
          "type": "array",
          "description": "Array of document pages ordered from first to last.",
          "items": [
            {
              "type": "object",
              "description": "Document page.",
              "properties": {
                "width": {
                  "type": "integer",
                  "description": "The width of the page in pixels."
                },
                "height": {
                  "type": "integer",
                  "description": "The height of the page in points."
                },
                "rotated": {
                  "type": "string",
                  "description": "Rotation of the page relative to the original image.",
                  "enum": [
                    "none",
                    "clockwise",
                    "counterclockwise",
                    "upside-down"
                  ]
                },
                "texts": {
                  "type": "array",
                  "description": "An array of blocks with text.",
                  "items": [
                    {
                      "type": "object",
                      "description": "Text.",
                      "properties": {
                        "id" : {
                          "type": "string",
                          "description": "Text block ID."
                        },
                        "position": {
                          "$ref": "#/definitions/rect"
                        },
                        "confidence": {
                          "$ref": "#/definitions/confidence"
                        },
                        "lines": {
                          "#ref": "#/definitions/lines"
                        }
                      }
                    }
                  ]
                },
                "tables": {
                  "type": "array",
                  "description": "An array of blocks with a table.",
                  "items": [
                    {
                      "type": "object",
                      "description": "Table.",
                      "properties": {
                        "id" : {
                          "type": "string",
                          "description": "Table block ID."
                        },
                        "position": {
                          "$ref": "#/definitions/rect"
                        },
                        "confidence": {
                          "$ref": "#/definitions/confidence"
                        },
                        "cells": {
                          "type": "array",
                          "description": "Table cells.",
                          "items": [
                            {
                              "type": "object",
                              "description": "Table cell.",
                              "properties": {
                                "id" : {
                                  "type": "string",
                                  "description": "Table cell ID."
                                },
                                "position": {
                                  "$ref": "#/definitions/rect"
                                },
                                "confidence": {
                                  "$ref": "#/definitions/confidence"
                                },
                                "colRowPosition": {
                                  "type": "object",
                                  "description": "Cell coordinate in the Column-Row system {l:leftColumn, t:topRow, r:rightColumn, b:bottomRow}",
                                  "properties":{
                                    "l": {
                                      "type": "integer",
                                      "description": "Left separator index."
                                    },
                                    "t": {
                                      "type": "integer",
                                      "description": "Top separator index."
                                    },
                                    "r": {
                                      "type": "integer",
                                      "description": "Right separator index."
                                    },
                                    "b": {
                                      "type": "integer",
                                      "description": "Bottom separator index."
                                    }
                                  }
                                },
                                "borders": {
                                  "type": "object",
                                  "description": "Table cell border types {l:left, t:top, r:right, b:bottom}",
                                  "properties":{
                                    "l": {
                                      "$ref": "#/definitions/borderType",
                                      "description": "Left border type."
                                    },
                                    "t": {
                                      "$ref": "#/definitions/borderType",
                                      "description": "Top border type."
                                    },
                                    "r": {
                                      "$ref": "#/definitions/borderType",
                                      "description": "Right border type."
                                    },
                                    "b": {
                                      "$ref": "#/definitions/borderType",
                                      "description": "Bottom border type."
                                    }
                                  }
                                },
                                "contentType": {
                                  "type": "string",
                                  "description": "Table cell content type: [text, picture, barcode].",
                                  "enum": [
                                    "text",
                                    "picture",
                                    "barcode"
                                  ]
                                },
                                "picture": {
                                  "$ref": "#/definitions/picture"
                                },
                                "barcode": {
                                  "$ref": "#/definitions/barcode"
                                },
                                "lines": {
                                  "$ref": "#/definitions/lines"
                                }
                              }
                            }
                          ]
                        }
                      }
                    }
                  ]
                },
                "pictures": {
                  "type": "array",
                  "description": "Array of blocks with a picture.",
                  "items": [
                    {
                      "$ref": "#/definitions/picture"
                    }
                  ]
                },
                "barcodes": {
                  "type": "array",
                  "description": "Array of blocks with barcode.",
                  "items": [
                    {
                      "$ref": "#/definitions/barcode"
                    }
                  ]
                },
                "separators": {
                  "type": "array",
                  "description": "Array of blocks with separator.",
                  "items": [
                    {
                      "type": "object",
                      "description": "Separator.",
                      "properties": {
                        "position": {
                          "$ref": "#/definitions/rect"
                        },
                        "confidence": {
                          "$ref": "#/definitions/confidence"
                        },
                        "color": {
                          "type": "integer",
                          "description": "Separator color."
                        },
                        "thickness": {
                          "type": "integer",
                          "description": "Separator thickness."
                        },
                        "type": {
                          "type": "string",
                          "description": "Separator type.",
                          "enum": [
                            "unknown",
                            "solid",
                            "dotted"
                          ]
                        },
                        "endPoints": {
                          "type": "object",
                          "description": "Coordinates of the separator ends {startX:startX, startY:startY, endX:endX, endY:endY}",
                          "properties":{
                            "startX": {
                              "type": "integer",
                              "description": "Start point X-coordinate."
                            },
                            "startY": {
                              "type": "integer",
                              "description": "Start point Y-coordinate."
                            },
                            "endX": {
                              "type": "integer",
                              "description": "End point X-coordinate."
                            },
                            "endY": {
                              "type": "integer",
                              "description": "End point Y-coordinate."
                            }
                          }
                        }
                      }
                    }
                  ]
                },
                "checkmarks": {
                  "type": "array",
                  "description": "Array of blocks with checkmark.",
                  "items": [
                    {
                      "type": "object",
                      "description": "Checkmark.",
                      "properties": {
                        "position": {
                          "$ref": "#/definitions/rect"
                        },
                        "confidence": {
                          "$ref": "#/definitions/confidence"
                        },
                        "value": {
                          "type": "string",
                          "description": "Checkmark value.",
                          "enum": [
                            "checked",
                            "unchecked",
                            "corrected",
                            "unknown"
                          ]
                        }
                      }
                    }
                  ]
                }
              }
            }
          ]
        }
      }
    },
    "content": {
      "type": "object",
      "description": "Document content (logical structure).",
      "properties": {
        "paragraphs": {
          "type": "array",
          "description": "An array of document paragraphs ordered by reading direction.",
          "items": [
            {
              "type": "object",
              "description": "Document paragraph.",
              "properties": {
                "id": {
                  "type": "string",
                  "description": "Paragraph ID in the document."
                },
                "role": {
                  "type": "string",
                  "description": "Paragraph role in the document.",
                  "enum": [
                    "other",
                    "text",
                    "heading",
                    "headingNumber",
                    "tableOfContents",
                    "tableText",
                    "runningTitle",
                    "endNote",
                    "footNote",
                    "tableCaption",
                    "tableHeading",
                    "pictureCaption",
                    "artefact"
                  ]
                },
                "formatting": {
                  "type": "object",
                  "description": "Paragraph formatting.",
                  "properties": {
                    "aligning": {
                      "type": "string",
                      "description": "Text aligning in the paragraph.",
                      "enum": [
                        "left",
                        "center",
                        "right",
                        "justified",
                        "justifiedForArabic"
                      ]
                    },
                    "lineSpacing": {
                      "type": "integer",
                      "description": "Paragraph line spacing.",
                      "minimum": 0,
                      "default": 0
                    }
                  }
                },
                "layoutReferences": {
                  "type": "array",
                  "description": "An array of document paragraph parts sources in the page block structure, in the order of the text in the paragraph.",
                  "items": [
                    {
                      "type": "object",
                      "description": "Reference block/cell id, paragraph index and line range in the block/cell.",
                      "required": [
                        "blockId",
                        "blockType",
                        "parIndex",
                        "firstLine",
                        "lastLine"
                      ],
                      "properties": {
                        "blockId": {
                          "type": "string",
                          "description": "Containing text block or table cell ID."
                        },
                        "blockType": {
                          "type": "string",
                          "description": "Paragraph container type.",
                          "enum": [
                            "text",
                            "cell"
                          ]
                        },
                        "sectionIndex": {
                          "type": "integer",
                          "description": "Index of logical section on page (or -1 if there is no section containing the paragraph - like headers, footers etc.).",
                          "minimum": -1
                        },
                        "columnIndex": {
                          "type": "integer",
                          "description": "Index of column in logical section (or -1 if there is no column containing the paragraph - like incuts, footnotes etc).",
                          "minimum": -1
                        },
                        "lineNumbering": {
                          "type": "boolean",
                          "description": "true if the paragraph is contained in line numbering area."
                        },
                        "parIndex": {
                          "type": "integer",
                          "description": "Paragraph index in block or table cell.",
                          "minimum": 0
                        },
                        "firstLine": {
                          "type": "integer",
                          "description": "Index of the first line of the paragraph in block or table cell.",
                          "minimum": 0
                        },
                        "lastLine": {
                          "type": "integer",
                          "description": "Index of the last line of the paragraph in block or table cell.",
                          "minimum": 0
                        }
                      }
                    }
                  ]
                },
                "text": {
                  "type": "string",
                  "description": "Paragraph text."
                },
                "listReference": {
                  "type": "object",
                  "description": "Reference to list containing the paragraph.",
                  "properties": {
                    "id": {
                      "type": "string",
                      "description": "List ID."
                    },
                    "levelIndex": {
                      "type": "integer",
                      "description": "Current list item level index.",
                      "minimum": 0,
                      "default": 0
                    },
                    "ordinalNumber": {
                      "type": "integer",
                      "description": "Current list item ordinal number.",
                      "minimum": -1,
                      "default": 0
                    }
                  }
                },
                
              }
            }
          ]
        },
        "lists": {
          "type": "array",
          "description": "An array of lists found in th document.",
          "items": [
            {
              "type": "object",
              "description": "List description.",
              "properties": {
                "id": {
                  "type": "string",
                  "description": "List ID in the document."
                },
                "listLevels": {
                  "type": "array",
                  "description": "An array of list level descriptions.",
                  "items": [
                    {
                      "$ref": "#/definitions/listLevel"
                    }
                  ]
                }
              }
            }
          ]
        }
      }
    }
  }
}```

Public JSON schema

JSON schema for the public Vantage extraction output.

OCR XML schema

XML equivalent of the OCR JSON schema for Output activity.

Output activity

The Process activity that produces output files using these schemas.