Copy
Ask AI
"$schema": "http://json-schema.org/draft-07/schema",
"type": "object",
"description": "Document。",
"definitions": {
"borderType": {
"type": "string",
"description": "表格单元格边框类型。",
"enum": [
"unknown",
"invisible",
"visible"
]
},
"rect": {
"type": "object",
"description": "Rectangle {r:left, t:top, r:right, b:bottom}",
"required": [
"l",
"t",
"r",
"b"
],
"properties": {
"l": {
"type": "integer",
"description": "左侧坐标。"
},
"t": {
"type": "integer",
"description": "顶部坐标。"
},
"r": {
"type": "integer",
"description": "右侧坐标。"
},
"b": {
"type": "integer",
"description": "底部坐标。"
}
}
},
"confidence": {
"type": "number",
"description": "置信度。",
},
"listNumberingStyle": {
"type": "string",
"description": "列表编号样式。",
"enum": [
"None",
"Decimal",
"UpperRoman",
"LowerRoman",
"UpperLetter",
"LowerLetter",
"Ordinal",
"CardinalText",
"OrdinalText",
"Hex",
"Chicago",
"IdeographDigital",
"JapaneseCounting",
"Aiueo",
"Iroha",
"DecimalFullWidth",
"DecimalHalfWidth",
"JapaneseLegal",
"JapaneseDigitalTenThousand",
"DecimalEnclosedCircle",
"DecimalFullWidth2",
"AiueoFullWidth",
"IrohaFullWidth",
"DecimalZero",
"Bullet",
"Ganada",
"Chosung",
"DecimalEnclosedFullstop",
"DecimalEnclosedParen",
"DecimalEnclosedCircleChinese",
"IdeographEnclosedCircle",
"IdeographTraditional",
"IdeographZodiac",
"IdeographZodiacTraditional",
"TaiwaneseCounting",
"IdeographLegalTraditional",
"TaiwaneseCountingThousand",
"TaiwaneseDigital",
"ChineseCounting",
"ChineseLegalSimplified",
"ChineseCountingThousand",
"ApplicationDefined",
"KoreanDigital",
"KoreanCounting",
"KoreanLegal",
"KoreanDigital2",
"Hebrew1",
"ArabicAlpha",
"Hebrew2",
"ArabicAbjad",
"HindiVowels",
"HindiConsonants",
"HindiNumbers",
"HindiCounting",
"ThaiLetters",
"ThaiNumbers",
"ThaiCounting",
"VietnameseCounting",
"NumberInDash",
"RussianLower",
"RussianUpper",
"Burmese",
"Unnumbered"
]
},
"listLevel": {
"type": "object",
"description": "列表级别说明",
"required": [
"levelIndex",
"numberingStyle",
"startNumber"
],
"properties": {
"levelIndex": {
"type": "integer",
"description": "级别索引。",
"minimum": 0
},
"numberingStyle": {
"$ref": "#/definitions/listNumberingStyle",
"description": "列表编号样式。"
},
"startNumber": {
"type": "integer",
"description": "起始编号。"
}
}
},
"picture": {
"type": "object",
"description": "包含图像的块。",
"properties": {
"id": {
"type": "string",
"description": "图片块 ID。"
},
"position": {
"$ref": "#/definitions/rect"
},
"confidence": {
"$ref": "#/definitions/confidence"
}
}
},
"barcode": {
"type": "object",
"description": "条形码。",
"properties": {
"id": {
"type": "string",
"description": "条形码块 ID。"
},
"position": {
"$ref": "#/definitions/rect"
},
"confidence": {
"$ref": "#/definitions/confidence"
},
"type": {
"type": "string",
"description": "条形码类型。",
"enum": [
"Code39",
"Interleaved25",
"EAN13",
"Code128",
"EAN8",
"PDF417",
"Codabar",
"UPCE",
"Industrial25",
"IATA25",
"Matrix25",
"Code93",
"PostNet",
"UCC128",
"Patch",
"Aztec",
"DataMatrix",
"QRCode",
"UPCA",
"MaxiCode",
"Code32",
"FullAscii",
"IntelligentMail",
"RoyalMail4State",
"KIX",
"Australia4State",
"JapanPost",
"NotFound"
]
},
"value": {
"type": "string",
"description": "条形码值。"
},
"supplementType": {
"type": "string",
"description": "补充类型。",
"enum": [
"none",
"2digits",
"5digits"
]
},
"supplementValue": {
"type": "string",
"description": "补充值。"
}
}
},
"lines": {
"type": "array",
"description": "文本块中按阅读顺序排列的文本字符串数组。",
"items": [
{
"type": "object",
"description": "一行文本。",
"properties": {
"position": {
"$ref": "#/definitions/rect"
},
"confidence": {
"$ref": "#/definitions/confidence"
},
"text": {
"type": "string",
"description": "文本行的值。"
},
"charParams": {
"$ref": "#/definitions/charParams"
},
"words": {
"type": "array",
"description": "文本行中按阅读顺序排列的单词数组。",
"items": [
{
"type": "object",
"description": "单词。",
"properties": {
"position": {
"$ref": "#/definitions/rect"
},
"confidence": {
"$ref": "#/definitions/confidence"
},
"text": {
"type": "string",
"description": "单词的值。"
},
"charParams": {
"$ref": "#/definitions/charParams"
},
"chars": {
"type": "array",
"description": "单词中按阅读顺序排列的字符数组。",
"items": [
{
"type": "object",
"description": "单词字符。",
"properties": {
"confidence": {
"$ref": "#/definitions/confidence"
},
"text": {
"type": "string",
"description": "单词字符的值。"
},
"position": {
"$ref": "#/definitions/rect"
},
"charParams": {
"$ref": "#/definitions/charParams"
}
}
}
]
}
}
}
]
}
}
}
]
},
"charParams": {
"type": "object",
"description": "字符字体格式参数。",
"properties": {
"bold": {
"type": "boolean",
"description": "粗体属性状态 - 如果与容器的状态不同",
"default": false
},
"italic": {
"type": "boolean",
"description": "斜体属性状态 - 如果与容器的状态不同",
"default": false
},
"underlined": {
"type": "boolean",
"description": "下划线属性状态 - 如果与容器的状态不同",
"default": false
},
"strikeout": {
"type": "boolean",
"description": "删除线属性状态 - 如果与容器的状态不同",
"default": false
},
"smallCaps": {
"type": "boolean",
"description": "小型大写字母属性状态 - 如果与容器的状态不同",
"default": false
},
"superscript": {
"type": "boolean",
"description": "上标属性状态 - 如果与容器的状态不同",
"default": false
},
"subscript": {
"type": "boolean",
"description": "下标属性状态 - 如果与容器的状态不同",
"default": false
},
"scaling": {
"type": "integer",
"description": "缩放属性状态(以千分之一为单位) - 如果与容器的状态不同",
"default": 1000,
"minimum": 100,
"maximum": 10000
},
"spacing": {
"type": "integer",
"description": "间距属性状态,以缇为单位(1/20 磅,1/1440 英寸)——如果与容器的间距不同",
"default": 0,
"minimum": -1000,
"maximum": 1000
},
"fontSize": {
"type": "integer",
"description": "字体大小属性状态,以缇为单位(1/20 磅,1/1440 英寸)——如果与容器的字体大小不同",
"default": 200,
"minimum": 50,
"maximum": 4000
},
"fontName": {
"type": "string",
"description": "字体名称属性状态——如果与容器的字体名称不同",
},
"color": {
"type": "string",
"description": "符号字体颜色属性状态(6 位十六进制格式 RRGGBB 的值)——如果与容器的颜色不同",
},
"backgroundColor": {
"type": "string",
"description": "符号背景颜色属性状态(6 位十六进制格式 RRGGBB 的值)——如果与容器的背景颜色不同",
},
"lang": {
"type": "string",
"description": "符号语言属性状态(由语言和国家/地区部分组成,请参阅 ISO 639 和 ISO 3166)——如果与容器的语言不同",
}
}
}
},
"required": [
"version",
"producer"
],
"properties": {
"version": {
"type": "string",
"description": "Document 架构版本。",
"default": "Vantage OCR.Skill JSON 输出 v1.0",
},
"producer": {
"type": "string",
"description": "JSON 文件作者。",
"default": "ABBYY Vantage OCR.Skill",
},
"languages": {
"type": "array",
"description": "文档语言列表。",
"items": [
{
"type": "string",
"description": "文档语言。",
}
]
},
"layout": {
"type": "object",
"description": "布局(物理结构)。",
"required": [
"pages"
],
"properties": {
"corrected": {
"type": "boolean",
"description": "输出中已校正图像的坐标。",
"default": true
},
"pages": {
"type": "array",
"description": "从第一页到最后一页排序的文档页数组。",
"items": [
{
"type": "object",
"description": "文档页。",
"properties": {
"width": {
"type": "integer",
"description": "页面宽度(以像素为单位)。",
},
"height": {
"type": "integer",
"description": "页面高度(以磅为单位)。",
},
"rotated": {
"type": "string",
"description": "页面相对于原始图像的旋转。",
"enum": [
"none",
"clockwise",
"counterclockwise",
"upside-down"
]
},
"texts": {
"type": "array",
"description": "包含文本的块数组。",
"items": [
{
"type": "object",
"description": "文本。",
"properties": {
"id" : {
"type": "string",
"description": "文本块 ID。",
},
"position": {
"$ref": "#/definitions/rect"
},
"confidence": {
"$ref": "#/definitions/confidence"
},
"lines": {
"#ref": "#/definitions/lines"
}
}
}
]
},
"tables": {
"type": "array",
"description": "包含表格的块数组。",
"items": [
{
"type": "object",
"description": "表格。",
"properties": {
"id" : {
"type": "string",
"description": "表格块 ID。",
},
"position": {
"$ref": "#/definitions/rect"
},
"confidence": {
"$ref": "#/definitions/confidence"
},
"cells": {
"type": "array",
"description": "表格单元格。",
"items": [
{
"type": "object",
"description": "表格单元格。",
"properties": {
"id" : {
"type": "string",
"description": "表格单元格 ID。",
},
"position": {
"$ref": "#/definitions/rect"
},
"confidence": {
"$ref": "#/definitions/confidence"
},
"colRowPosition": {
"type": "object",
"description": "单元格在列-行系统中的坐标 {l:左列,t:顶行,r:右列,b:底行}",
"properties":{
"l": {
"type": "integer",
"description": "左分隔符索引。",
},
"t": {
"type": "integer",
"description": "顶部分隔符索引。",
},
"r": {
"type": "integer",
"description": "右分隔符索引。",
},
"b": {
"type": "integer",
"description": "底部分隔符索引。",
}
}
},
"borders": {
"type": "object",
"description": "表格单元格边框类型 {l:左,t:顶,r:右,b:底}",
"properties":{
"l": {
"$ref": "#/definitions/borderType",
"description": "左边框类型。",
},
"t": {
"$ref": "#/definitions/borderType",
"description": "顶部边框类型。",
},
"r": {
"$ref": "#/definitions/borderType",
"description": "右边框类型。",
},
"b": {
"$ref": "#/definitions/borderType",
"description": "底部边框类型。",
}
}
},
"contentType": {
"type": "string",
"description": "表格单元格内容类型:[text, picture, barcode]。",
"enum": [
"text",
"picture",
"barcode"
]
},
"picture": {
"$ref": "#/definitions/picture"
},
"barcode": {
"$ref": "#/definitions/barcode"
},
"lines": {
"$ref": "#/definitions/lines"
}
}
}
]
}
}
}
]
},
"pictures": {
"type": "array",
"description": "包含图片的块数组。",
"items": [
{
"$ref": "#/definitions/picture"
}
]
},
"barcodes": {
"type": "array",
"description": "包含条形码的块数组。",
"items": [
{
"$ref": "#/definitions/barcode"
}
]
},
"separators": {
"type": "array",
"description": "包含分隔符的块数组。",
"items": [
{
"type": "object",
"description": "分隔符。",
"properties": {
"position": {
"$ref": "#/definitions/rect"
},
"confidence": {
"$ref": "#/definitions/confidence"
},
"color": {
"type": "integer",
"description": "分隔符颜色。",
},
"thickness": {
"type": "integer",
"description": "分隔符粗细。",
},
"type": {
"type": "string",
"description": "分隔符类型。",
"enum": [
"unknown",
"solid",
"dotted"
]
},
"endPoints": {
"type": "object",
"description": "分隔符端点的坐标 {startX:起始X,startY:起始Y,endX:结束X,endY:结束Y}",
"properties":{
"startX": {
"type": "integer",
"description": "起始点 X 坐标。",
},
"startY": {
"type": "integer",
"description": "起始点 Y 坐标。",
},
"endX": {
"type": "integer",
"description": "结束点 X 坐标。",
},
"endY": {
"type": "integer",
"description": "结束点 Y 坐标。",
}
}
}
}
}
]
},
"checkmarks": {
"type": "array",
"description": "包含复选标记的块数组。",
"items": [
{
"type": "object",
"description": "复选标记。",
"properties": {
"position": {
"$ref": "#/definitions/rect"
},
"confidence": {
"$ref": "#/definitions/confidence"
},
"value": {
"type": "string",
"description": "复选标记值。",
"enum": [
"checked",
"unchecked",
"corrected",
"unknown"
]
}
}
}
]
}
}
}
]
}
}
},
"content": {
"type": "object",
"description": "Document 内容(逻辑结构)。",
"properties": {
"paragraphs": {
"type": "array",
"description": "按阅读顺序排列的文档段落数组。",
"items": [
{
"type": "object",
"description": "文档段落。",
"properties": {
"id": {
"type": "string",
"description": "段落在文档中的 ID。",
},
"role": {
"type": "string",
"description": "段落在文档中的角色。",
"enum": [
"other",
"text",
"heading",
"headingNumber",
"tableOfContents",
"tableText",
"runningTitle",
"endNote",
"footNote",
"tableCaption",
"tableHeading",
"pictureCaption",
"artefact"
]
},
"formatting": {
"type": "object",
"description": "段落格式。",
"properties": {
"aligning": {
"type": "string",
"description": "段落中的文本对齐方式。",
"enum": [
"left",
"center",
"right",
"justified",
"justifiedForArabic"
]
},
"lineSpacing": {
"type": "integer",
"description": "段落行间距。",
"minimum": 0,
"default": 0
}
}
},
"layoutReferences": {
"type": "array",
"description": "页面块结构中文档段落各部分来源的数组,按段落中文本的顺序排列。",
"items": [
{
"type": "object",
"description": "引用块/单元格 ID、段落索引以及块/单元格中的行范围。",
"required": [
"blockId",
"blockType",
"parIndex",
"firstLine",
"lastLine"
],
"properties": {
"blockId": {
"type": "string",
"description": "包含的文本块或表格单元格 ID。",
},
"blockType": {
"type": "string",
"description": "段落容器类型。",
"enum": [
"text",
"cell"
]
},
"sectionIndex": {
"type": "integer",
"description": "页面上逻辑节的索引(如果没有包含该段落的节则为 -1,例如页眉、页脚等)。",
"minimum": -1
},
"columnIndex": {
"type": "integer",
"description": "逻辑节中列的索引(如果没有包含该段落的列则为 -1,例如插图、脚注等)。",
"minimum": -1
},
"lineNumbering": {
"type": "boolean",
"description": "如果段落包含在行编号区域中则为 true。",
},
"parIndex": {
"type": "integer",
"description": "块或表格单元格中的段落索引。",
"minimum": 0
},
"firstLine": {
"type": "integer",
"description": "块或表格单元格中段落首行的索引。",
"minimum": 0
},
"lastLine": {
"type": "integer",
"description": "块或表格单元格中段落末行的索引。",
"minimum": 0
}
}
}
]
},
"text": {
"type": "string",
"description": "段落文本。",
},
"listReference": {
"type": "object",
"description": "对包含该段落的列表的引用。",
"properties": {
"id": {
"type": "string",
"description": "列表 ID。",
},
"levelIndex": {
"type": "integer",
"description": "当前列表项的级别索引。",
"minimum": 0,
"default": 0
},
"ordinalNumber": {
"type": "integer",
"description": "当前列表项的序号。",
"minimum": -1,
"default": 0
}
}
},
}
}
]
},
"lists": {
"type": "array",
"description": "文档中找到的列表数组。",
"items": [
{
"type": "object",
"description": "列表描述。",
"properties": {
"id": {
"type": "string",
"description": "列表在文档中的 ID。",
},
"listLevels": {
"type": "array",
"description": "列表级别描述数组。",
"items": [
{
"$ref": "#/definitions/listLevel"
}
]
}
}
}
]
}
}
}
}
}```
