[Core] Subclass ModelRunner to support cross-attention & encoder sequences (towards eventual encoder/decoder model support) (#4942)

Co-authored-by: Andrew Feldman <afeld2012@gmail.com>
Co-authored-by: Nick Hill <nickhill@us.ibm.com>
This commit is contained in:
afeldman-nm
2024-08-06 16:51:47 -04:00
committed by GitHub
parent 660470e5a3
commit fd95e026e0
33 changed files with 3957 additions and 333 deletions

View File

@@ -83,7 +83,16 @@ _EMBEDDING_MODELS = {
"MistralModel": ("llama_embedding", "LlamaEmbeddingModel"),
}
_MODELS = {**_GENERATION_MODELS, **_EMBEDDING_MODELS}
_CONDITIONAL_GENERATION_MODELS = {
"BartModel": ("bart", "BartForConditionalGeneration"),
"BartForConditionalGeneration": ("bart", "BartForConditionalGeneration"),
}
_MODELS = {
**_GENERATION_MODELS,
**_EMBEDDING_MODELS,
**_CONDITIONAL_GENERATION_MODELS
}
# Architecture -> type.
# out of tree models