在使用 llamaindex 构建 rag 引擎时,有一下方式:
标准的 RAG 过程,所有文档构建 1 个增强查询引擎 单独为每个文档构建 1 个查询引擎,然后让 llm 选择查询引擎使用 不同的构建方式是否有差异?
本脚本比较他们之间的差异,评估不同方法的效果
方法 answer_relevancy context_relevancy correctness faithfulness 所有文档 1 个引擎 0.85 0.84375 2.95 0.25 每个文档 1 个引擎 0.7 0.70625 3.025 0.25 每个文档 2 个引擎 0.825 0.786 3.6 0.1 每个文档 2 个引擎 2 0.725 0.77125 2.8 0.25
指标只是有相对参考性,原因如下:1)没有使用私域数据,文档内的知识可能 llm 本身就具备;2) 没有定制 prompt,不同方式的倾向不同,有的方法擅长给出步骤,有的方法擅长总结
1 2 3 4 5 6 7 from llama_index.core import Settingsfrom llama_index.llms.ollama import Ollamafrom llama_index.embeddings.ollama import OllamaEmbeddingbase_url='http://192.168.3.155:11434' llm = Ollama(model="qwen2.5:latest" , request_timeout=360.0 ,base_url=base_url) Settings.llm = llm Settings.embed_model = OllamaEmbedding(model_name="quentinz/bge-large-zh-v1.5:latest" ,base_url=base_url)
生成测试数据1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 import nest_asyncionest_asyncio.apply() def displayify_df (df ): """For pretty displaying DataFrame in a notebook.""" display_df = df.style.set_properties( **{ "inline-size" : "500px" , "overflow-wrap" : "break-word" , } ) display(display_df) import osfrom llama_index.core.llama_dataset.generator import RagDatasetGeneratorfrom llama_index.core.prompts.base import PromptTemplatefrom llama_index.core.prompts.prompt_type import PromptTypefrom llama_index.core.llama_dataset import LabeledRagDatasetfrom llama_index.core.llama_dataset import RagPredictionDatasetasync def Build_test_dataset (nodes,query_engine,test_size=10 ,data_dir='./data' ,prefix='' ): ragdataset_path=os.path.join(data_dir,f'ragdataset.json' ) ragdataset_predictions_path=os.path.join(data_dir,f'{prefix} -ragdataset_predictions.json' ) if os.path.exists(ragdataset_path): rag_dataset=LabeledRagDataset.from_json(ragdataset_path) else : DEFAULT_QUESTION_GENERATION_PROMPT = """\ Context information is below. --------------------- {context_str} --------------------- Given the context information and not prior knowledge. generate only questions based on the below query. 使用中文生成答案 {query_str} """ DEFAULT_TEXT_QA_PROMPT_TMPL=( "Context information is below.\n" "---------------------\n" "{context_str}\n" "---------------------\n" "Given the context information and not prior knowledge,answer the query.\n" "使用中文生成答案\n" "Query: {query_str}\n" "Answer: " ) text_qa_template = PromptTemplate( DEFAULT_TEXT_QA_PROMPT_TMPL, prompt_type=PromptType.QUESTION_ANSWER ) text_question_template=PromptTemplate(DEFAULT_QUESTION_GENERATION_PROMPT) num_questions_per_chunk=1 role="小说作家" question_gen_query=f""" You are a {role} . Your task is to setup {num_questions_per_chunk} questions for an upcoming quiz/examination. The questions should be diverse in nature across the document. Restrict the questions to the context information provided. """ import random random.seed(0 ) test_size=min (len (nodes),test_size) sample_nodes=random.sample(nodes,test_size) print ('step1:初始化数据生成器' ) rag_dataset_generator=RagDatasetGenerator(nodes=sample_nodes, text_question_template=text_question_template, text_qa_template=text_qa_template, question_gen_query=question_gen_query, num_questions_per_chunk=num_questions_per_chunk) print ('step2:为每个node生成问题(包含标准答案)' ) rag_dataset = rag_dataset_generator.generate_dataset_from_nodes() rag_dataset.save_json(ragdataset_path) if os.path.exists(ragdataset_predictions_path): rag_predictions_dataset=RagPredictionDataset.from_json(ragdataset_predictions_path) else : print ('step3:使用query_engine回答问题' ) rag_predictions_dataset=await rag_dataset.amake_predictions_with( predictor=query_engine, batch_size=10 , sleep_time_in_seconds=2 ) rag_predictions_dataset.save_json(ragdataset_predictions_path) return rag_dataset,rag_predictions_dataset
定义评估函数1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 from typing import List from llama_index.core.evaluation import BatchEvalRunnerfrom llama_index.core.evaluation import ( AnswerRelevancyEvaluator, ContextRelevancyEvaluator, CorrectnessEvaluator, FaithfulnessEvaluator, ) runner=BatchEvalRunner( evaluators={ "answer_relevancy" :AnswerRelevancyEvaluator(), "context_relevancy" :ContextRelevancyEvaluator(), "correctness" :CorrectnessEvaluator(), "faithfulness" :FaithfulnessEvaluator() }, workers=12 , show_progress=True ) async def eval_query_engine (queries:List [str ],contexts_list:List [List [str ]],response_strs:List [str ] ): eval_results=await runner.aevaluate_response_strs( queries=queries, contexts_list=contexts_list, response_strs=response_strs ) for key in eval_results.keys(): results = eval_results[key] scores = 0 for result in results: score = getattr (result,'score' ,0 ) if score: scores += score score = scores / len (results) print (f"{key} Score: {score} " )
所有文档构建 1 个查询引擎1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 from llama_index.core import SimpleDirectoryReaderfrom llama_index.core.node_parser import SentenceSplitterdocuments=SimpleDirectoryReader(input_dir='../../data/sidaminzhu' ,recursive=True ).load_data(show_progress=True ) splitter = SentenceSplitter(chunk_size=1024 ) nodes = splitter.get_nodes_from_documents(documents,show_progress=True ) from llama_index.core import VectorStoreIndexIndex=VectorStoreIndex (nodes=nodes, show_progress=True ) Query_engine=index. As_query_engine () Rag_dataset, rag_predictions_dataset=await Build_test_dataset ( Nodes, query_engine, test_size=20 , prefix='OneEngine' ) Queries=[example. Query for example in rag_dataset. Examples] Contexts_list=[example. Reference_contexts for example in rag_dataset. Examples] Response_strs=[example. Response for example in rag_predictions_dataset. Predictions] Await eval_query_engine (queries, contexts_list, response_strs)
100%|██████████| 80/80 [05:51<00:00, 4.39 s/it] Answer_relevancy Score: 0.85 Context_relevancy Score: 0.84375 Correctness Score: 2.95 Faithfulness Score: 0.25
每个文档构建 1 个查询引擎1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 Import glob From llama_index. Core. Tools import FunctionTool Dir_list=glob.Glob ('../../data/sidaminzhu/*' ) Documents=[ SimpleDirectoryReader (input_dir). Load_data (show_progress=True ) For input_dir in dir_list ] Splitter = SentenceSplitter (chunk_size=1024 ) Documents_nodes=[ Splitter. Get_nodes_from_documents (document) For document in documents ] Documents_indexs=[ VectorStoreIndex (nodes=nodes, show_progress=True ) For nodes in documents_nodes ] Def get_doc_tools ( Vector_index, name: str , ) -> str : Def vector_query (query: str ) -> str : F'''设计用于回答关于{name} 的问题 Query : 输入内容 ''' Query_engine = vector_index. As_query_engine (similarity_top_k=2 ) Response = query_engine.Query (query) Return response Vector_query_tool = FunctionTool. From_defaults ( Name=f"vector_tool_{name} " , fn=vector_query, description=f"关于{name} 问题的回答助手" ) Return vector_query_tool Dir_info=[os.Path.Split (dir )[1 ]. Replace ('白话文' ,'' ) for dir in dir_list] Documents_tools=[ Get_doc_tools (vector_index, dir_info[i]) For i, vector_index in enumerate (documents_indexs) ] From llama_index. Core. Objects import ObjectIndex Tool_index=ObjectIndex. From_objects ( Documents_tools, Index_cls=VectorStoreIndex ) Tool_retriever=tool_index. As_retriever (similarity_top_k=1 ) From llama_index. Core. Agent import FunctionCallingAgent Agent = FunctionCallingAgent. From_tools ( Tool_retriever=tool_retriever, System_prompt="""You are an agent designed to answer queries over a set of given documents. Please use the tools provided to answer a question as possible. Do not rely on prior knowledge\ """ , Verbose=False , ) Rag_dataset, rag_predictions_dataset=await Build_test_dataset ( Documents_nodes, agent, test_size=20 , prefix='MulEngine' ) Queries=[example. Query for example in rag_dataset. Examples] Contexts_list=[example. Reference_contexts for example in rag_dataset. Examples] Response_strs=[example. Response for example in rag_predictions_dataset. Predictions] Await eval_query_engine (queries, contexts_list, response_strs)
每个文档构建 2 个查询引擎1 2 3 From llama_index. Core import indices Indexs=list (filter (lambda att: att.Find ('Index' )>0 ,dir (indices))) Print (indexs)
[‘DocumentSummaryIndex’, ‘EmptyIndex’, ‘GPTDocumentSummaryIndex’, ‘GPTEmptyIndex’, ‘GPTKeywordTableIndex’, ‘GPTListIndex’, ‘GPTPandasIndex’, ‘GPTRAKEKeywordTableIndex’, ‘GPTSQLStructStoreIndex’, ‘GPTSimpleKeywordTableIndex’, ‘GPTTreeIndex’, ‘GPTVectorStoreIndex’, ‘KeywordTableIndex’, ‘KnowledgeGraphIndex’, ‘ListIndex’, ‘MultiModalVectorStoreIndex’, ‘PandasIndex’, ‘PropertyGraphIndex’, ‘RAKEKeywordTableIndex’, ‘SQLStructStoreIndex’, ‘SimpleKeywordTableIndex’, ‘SummaryIndex’, ‘TreeIndex’, ‘VectorStoreIndex’]
由以上输出可以,llamaindex 索引内容的方式有多种,以下选择 KeywordTableIndex, VectorStoreIndex 分别索引 1 个文档,检索时,llm 根据问题选择不同方式检索
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 Import glob From llama_index. Core import KeywordTableIndex, VectorStoreIndex From llama_index. Core. Tools import FunctionTool From llama_index. Core import SimpleDirectoryReader From llama_index. Core. Node_parser import SentenceSplitter Dir_list=glob.Glob ('../../data/sidaminzhu/*' ) Documents=[ SimpleDirectoryReader (input_dir). Load_data (show_progress=True ) For input_dir in dir_list ] Splitter = SentenceSplitter (chunk_size=1024 ) Documents_nodes=[ Splitter. Get_nodes_from_documents (document) For document in documents ] Vector_indexs=[ VectorStoreIndex (nodes=nodes, show_progress=True ) For nodes in documents_nodes ] Keyword_indexs=[ KeywordTableIndex (nodes=nodes, show_progress=True ) For nodes in documents_nodes ] Def get_doc_tools ( Vector_index, summary_indexs, name: str , ) -> str : Def vector_query (query: str ) -> str : F'''通过语义相关查询回答关于{name} 的问题,擅长精确查询答案 Query : 输入内容 ''' Query_engine = vector_index. As_query_engine (similarity_top_k=2 ) Response = query_engine.Query (query) Return response Vector_query_tool = FunctionTool. From_defaults ( Name=f"vector_tool_{name} " , fn=vector_query, description=f"关于{name} 问题的回答助手" ) Def keyword_query (query: str ) -> str : F'''回答关于{name} 的问题,擅长输出归纳性总结 Query : 输入内容 ''' Query_engine = keyword_indexs. As_query_engine ( Response_mode="tree_summarize" , Use_async=True , ) Response = query_engine.Query (query) Return response Vector_query_tool = FunctionTool. From_defaults ( Name=f"vector_tool_{name} " , fn=vector_query, description=f"关于{name} 问题的回答助手" ) Keyword_query_tool = FunctionTool. From_defaults ( Name=f"keyword_tool_{name} " , fn=keyword_query, description=f"关于{name} 问题的回答助手" ) Return vector_query_tool, keyword_query_tool Dir_info=[os.Path.Split (dir )[1 ]. Replace ('白话文' ,'' ) for dir in dir_list] Documents_tools=[ Get_doc_tools (vector_index, keyword_index, dir_info[i]) For i, (vector_index, keyword_index) in enumerate (zip (vector_indexs, keyword_indexs)) ] All_tools = [t for documents_tools in documents_tools for t in documents_tools] From llama_index. Core. Objects import ObjectIndex Tool_index=ObjectIndex. From_objects ( All_tools, Index_cls=VectorStoreIndex ) Tool_retriever=tool_index. As_retriever (similarity_top_k=2 ) From llama_index. Core. Agent import FunctionCallingAgent Agent = FunctionCallingAgent. From_tools ( Tool_retriever=tool_retriever, System_prompt="""You are an agent designed to answer queries over a set of given documents. Please use the tools provided to answer a question as possible. Do not rely on prior knowledge\ """ ) Rag_dataset, rag_predictions_dataset=await Build_test_dataset ( Documents_nodes, agent, test_size=20 , prefix='MulEngine 2' ) Queries=[example. Query for example in rag_dataset. Examples] Contexts_list=[example. Reference_contexts for example in rag_dataset. Examples] Response_strs=[example. Response for example in rag_predictions_dataset. Predictions] Await eval_query_engine (queries, contexts_list, response_strs)
100%|██████████| 80/80 [01:16<00:00, 1.04 it/s] Answer_relevancy Score: 0.8 Context_relevancy Score: 0.9362499999999999 Correctness Score: 3.55 Faithfulness Score: 0.1
每个文档构建 2 个查询引擎 2前面生成 agent 时,所有的 engine 都一起放入 agent,可能存在问题,以下先通过 RouterQueryEngine 汇总一个文档的所有 engine,再放到 agent 中
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 Import glob From llama_index. Core import KeywordTableIndex, VectorStoreIndex From llama_index. Core. Tools import FunctionTool From llama_index. Core import SimpleDirectoryReader From llama_index. Core. Node_parser import SentenceSplitter Dir_list=glob.Glob ('../../data/sidaminzhu/*' ) Documents=[ SimpleDirectoryReader (input_dir). Load_data (show_progress=True ) For input_dir in dir_list ] Splitter = SentenceSplitter (chunk_size=1024 ) Documents_nodes=[ Splitter. Get_nodes_from_documents (document) For document in documents ] Vector_indexs=[ VectorStoreIndex (nodes=nodes, show_progress=True ) For nodes in documents_nodes ] Keyword_indexs=[ KeywordTableIndex (nodes=nodes, show_progress=True ) For nodes in documents_nodes ] From llama_index. Core. Query_engine. Router_query_engine import RouterQueryEngine From llama_index. Core. Selectors import LLMSingleSelector From llama_index. Core. Tools import QueryEngineTool Documents_engines=[] For i, (vector_index, keyword_index) in enumerate (zip (vector_indexs, keyword_indexs)): Vector_query_engine = vector_index. As_query_engine (similarity_top_k=2 ) Vector_tool = QueryEngineTool. From_defaults ( Query_engine=vector_query_engine, Description=( "Useful for retrieving specific context from the documents" ), ) Keyword_query_engine = keyword_index. As_query_engine (response_mode="tree_summarize" , use_async=True ) Summary_tool = QueryEngineTool. From_defaults ( Query_engine=keyword_query_engine, Description=("Useful for summarization questions related to documents" ), ) Documents_engines.Append ( RouterQueryEngine ( Selector=LLMSingleSelector. From_defaults (), Query_engine_tools=[vector_tool, summary_tool], Verbose=True ) ) From llama_index. Core. Tools import QueryEngineTool, ToolMetadata Dir_info=[os.Path.Split (dir )[1 ]. Replace ('白话文' ,'' ) for dir in dir_list] Query_engine_tools = [ QueryEngineTool ( Query_engine=engine, Metadata=ToolMetadata (name=f"query_engine_{dir_info[i]} " , description=f"回答关于{dir_info[i]} 的问题" ) ) For i, engine in enumerate (documents_engines)] From llama_index. Core. Agent import FunctionCallingAgent Agent = FunctionCallingAgent. From_tools ( Tools=query_engine_tools, System_prompt="""You are an agent designed to answer queries over a set of given documents. Please use the tools provided to answer a question as possible. Do not rely on prior knowledge\ """ , Verbose=True , ) Rag_dataset, rag_predictions_dataset=await Build_test_dataset ( Documents_nodes, agent, test_size=20 , prefix='MulEngine 3' ) Queries=[example. Query for example in rag_dataset. Examples] Contexts_list=[example. Reference_contexts for example in rag_dataset. Examples] Response_strs=[example. Response for example in rag_predictions_dataset. Predictions] Await eval_query_engine (queries, contexts_list, response_strs)
100%|██████████| 80/80 [01:19<00:00, 1.01 it/s] Answer_relevancy Score: 0.725 Context_relevancy Score: 0.77125 Correctness Score: 2.8 Faithfulness Score: 0.25