Template schema
A template is a JSON object which specifies extraction fields, types and structure. When creating a data extraction pipeline you can either select a predefined extraction template or define a custom template. You can also start with a predefined template and adapt it to your needs.
Here is an example of an extraction template.
{
"schema": "https://smartextract.ai/schemas/template/v0",
"fields": [
{
"name": "general information",
"type": "record",
"description": "Extract general information e.g. receipt date and number.",
"fields": [
{
"name": "receipt number",
"type": "text",
"description": "The receipt number, usually found near the top."
},
{
"name": "date of issue",
"type": "date",
"description": "date the document was issued."
}
]
},
{
"type": "record",
"name": "payment details",
"description": "Information about how the purchase was paid for.",
"fields": [
{
"name": "net amount",
"type": "quantity",
"description": "The net total price of the products."
}
]
},
{
"type": "record",
"name": "line item",
"description": "Extract information on each line item in the receipt.",
"multiple": true,
"fields": [
{
"name": "description",
"type": "text",
"description": "textual description of the line item."
},
{
"name": "price",
"type": "quantity",
"description": "The price of the line item."
}
]
}
]
}
If we now define an extraction pipeline using this schema, it will output data
following the structure of the example provided in the
extraction schema documentation. Refer to that document for an
explanation of the different field types and the multiple
property.
Complete template schema in JSON schema format
{
"$defs": {
"Choice": {
"description": "Information defining one particular choice of a choice field.",
"properties": {
"name": {
"description": "Name of the field, to be used as the extracted value.",
"title": "Name",
"type": "string"
},
"description": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "A description of the choice item, used to instruct the LLM.",
"title": "Description"
}
},
"required": [
"name"
],
"title": "Choice",
"type": "object"
},
"ChoiceField": {
"description": "Information defining a field of type `choice`.",
"properties": {
"type": {
"const": "choice",
"title": "Type",
"type": "string"
},
"name": {
"description": "The name of the field.",
"title": "Name",
"type": "string"
},
"description": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "A description of the field, used to instruct the LLM.",
"title": "Description"
},
"multiple": {
"default": false,
"description": "Whether this field can appear multiple times.",
"title": "Multiple",
"type": "boolean"
},
"choices": {
"items": {
"$ref": "#/$defs/Choice"
},
"title": "Choices",
"type": "array"
}
},
"required": [
"type",
"name",
"choices"
],
"title": "ChoiceField",
"type": "object"
},
"DateField": {
"description": "Information defining a date field.",
"properties": {
"type": {
"const": "date",
"title": "Type",
"type": "string"
},
"name": {
"description": "The name of the field.",
"title": "Name",
"type": "string"
},
"description": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "A description of the field, used to instruct the LLM.",
"title": "Description"
},
"multiple": {
"default": false,
"description": "Whether this field can appear multiple times.",
"title": "Multiple",
"type": "boolean"
}
},
"required": [
"type",
"name"
],
"title": "DateField",
"type": "object"
},
"QuantityField": {
"description": "Information defining a date field.",
"properties": {
"type": {
"const": "quantity",
"title": "Type",
"type": "string"
},
"name": {
"description": "The name of the field.",
"title": "Name",
"type": "string"
},
"description": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "A description of the field, used to instruct the LLM.",
"title": "Description"
},
"multiple": {
"default": false,
"description": "Whether this field can appear multiple times.",
"title": "Multiple",
"type": "boolean"
}
},
"required": [
"type",
"name"
],
"title": "QuantityField",
"type": "object"
},
"RecordField": {
"description": "Information defining a field of type `record`.",
"properties": {
"type": {
"const": "record",
"title": "Type",
"type": "string"
},
"name": {
"description": "The name of the field.",
"title": "Name",
"type": "string"
},
"description": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "A description of the field, used to instruct the LLM.",
"title": "Description"
},
"multiple": {
"default": false,
"description": "Whether this field can appear multiple times.",
"title": "Multiple",
"type": "boolean"
},
"fields": {
"description": "A list of attributes of the record.",
"items": {
"oneOf": [
{
"$ref": "#/$defs/TextField"
},
{
"$ref": "#/$defs/ChoiceField"
},
{
"$ref": "#/$defs/DateField"
},
{
"$ref": "#/$defs/QuantityField"
}
]
},
"title": "Fields",
"type": "array"
}
},
"required": [
"type",
"name",
"fields"
],
"title": "RecordField",
"type": "object"
},
"TextField": {
"description": "Information defining a plain text field.",
"properties": {
"type": {
"const": "text",
"default": "text",
"title": "Type",
"type": "string"
},
"name": {
"description": "The name of the field.",
"title": "Name",
"type": "string"
},
"description": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "A description of the field, used to instruct the LLM.",
"title": "Description"
},
"multiple": {
"default": false,
"description": "Whether this field can appear multiple times.",
"title": "Multiple",
"type": "boolean"
}
},
"required": [
"name"
],
"title": "TextField",
"type": "object"
}
},
"description": "Information defining an extraction template.",
"properties": {
"schema": {
"const": "https://smartextract.ai/schemas/template/v0",
"description": "Reference to the schema describing this data structure.",
"title": "Schema",
"type": "string"
},
"fields": {
"description": "List of simple fields or records of the template.",
"items": {
"oneOf": [
{
"$ref": "#/$defs/TextField"
},
{
"$ref": "#/$defs/ChoiceField"
},
{
"$ref": "#/$defs/DateField"
},
{
"$ref": "#/$defs/QuantityField"
},
{
"$ref": "#/$defs/RecordField"
}
]
},
"title": "Fields",
"type": "array"
}
},
"required": [
"schema",
"fields"
],
"title": "Template",
"type": "object"
}