ibm-granite
/

rag-intrinsics-lib

Model card Files Files and versions

rag-intrinsics-lib / citations /lora /granite-3.3-8b-instruct /io.yaml

cguna's picture

Updated certainty and answerability models (#9)

80632af verified 3 months ago

history blame contribute delete

2.94 kB

	# Model name string, or null to use whatever is provided in the chat completion request
	model: ~
	# JSON schema of the model's output
	response_format: \|
	{
	"$defs": {
	"_MODEL_OUTPUT_ENTRY": {
	"properties": {
	"r": {
	"minimum": 0,
	"title": "R",
	"type": "integer"
	},
	"c": {
	"items": {
	"minimum": 0,
	"type": "integer"
	},
	"title": "C",
	"type": "array"
	}
	},
	"required": [
	"r",
	"c"
	],
	"title": "_MODEL_OUTPUT_ENTRY",
	"type": "object"
	}
	},
	"items": {
	"$ref": "#/$defs/_MODEL_OUTPUT_ENTRY"
	},
	"title": "_MODEL_OUTPUT",
	"type": "array"
	}
	transformations:
	# Explode the list of document sentences in each citation
	- type: explode
	input_path: [] # Zero-length path means match root element
	target_field: "c"
	# Model may repeat itself; drop the resulting duplicates.
	- type: drop_duplicates
	input_path: [] # Zero-length path means match root element
	target_fields: ["r", "c"]
	# Replace sentence number with sentence location and contents.
	# Do this first for sentences from the last turn, then for sentences from documents.
	- type: decode_sentences
	source: "last_message"
	input_path: [~, "r"] # Null in path means wildcard
	# New fields to add for each sentence
	output_names:
	begin: "response_begin"
	end: "response_end"
	text: "response_text"
	- type: decode_sentences
	source: "documents"
	input_path: [~, "c"] # Null in path means wildcard
	# New fields to add for each sentence
	output_names:
	document_id: "citation_doc_id"
	begin: "citation_begin"
	end: "citation_end"
	text: "citation_text"
	# Remove fields that we no longer need
	- type: project
	input_path: []
	retained_fields:
	- "response_begin"
	- "response_end"
	- "response_text"
	- "citation_doc_id"
	- "citation_begin"
	- "citation_end"
	- "citation_text"
	# Merge adjacent document spans
	- type: merge_spans
	input_path: []
	group_fields: ["response_begin", "response_end", "response_text", "citation_doc_id"]
	begin_field: "citation_begin"
	end_field: "citation_end"
	text_field: "citation_text"

	instruction: >
	Split the last assistant response into individual sentences.
	For each sentence in the response, identify the statement IDs from the below
	documents that it references. Ensure that your output includes all response
	sentence IDs, and for each response sentence ID, provide the list of corresponding
	referring document sentence IDs. The output must be a json structure.
	parameters:
	max_completion_tokens: 4096
	sentence_boundaries:
	# Mapping from string location to sentence delimiter prefix
	last_message: "r" # <r0>, <r1>, etc.
	documents: "c"