本地搜索
在 [1] 中
已复制!
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License.
# 版权所有 (c) 2024 Microsoft Corporation。# 根据 MIT 许可证获得许可。
在 [2] 中
已复制!
import os
import pandas as pd
import tiktoken
from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
read_indexer_covariates,
read_indexer_entities,
read_indexer_relationships,
read_indexer_reports,
read_indexer_text_units,
)
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore
import os import pandas as pd import tiktoken from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey from graphrag.query.indexer_adapters import ( read_indexer_covariates, read_indexer_entities, read_indexer_relationships, read_indexer_reports, read_indexer_text_units, ) from graphrag.query.question_gen.local_gen import LocalQuestionGen from graphrag.query.structured_search.local_search.mixed_context import ( LocalSearchMixedContext, ) from graphrag.query.structured_search.local_search.search import LocalSearch from graphrag.vector_stores.lancedb import LanceDBVectorStore
本地搜索示例¶
本地搜索方法通过将来自 AI 提取的知识图谱的相关数据与原始文档的文本块相结合来生成答案。此方法适用于需要理解文档中提到的特定实体的问题(例如,洋甘菊的治疗特性是什么?)。
加载文本单元和图数据表作为本地搜索的上下文¶
- 在此测试中,我们首先将索引输出从 parquet 文件加载到数据帧,然后将这些数据帧转换为与知识模型对齐的数据对象集合。
加载表到数据帧¶
在 [3] 中
已复制!
INPUT_DIR = "./inputs/operation dulce"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"
COMMUNITY_REPORT_TABLE = "community_reports"
ENTITY_TABLE = "entities"
COMMUNITY_TABLE = "communities"
RELATIONSHIP_TABLE = "relationships"
COVARIATE_TABLE = "covariates"
TEXT_UNIT_TABLE = "text_units"
COMMUNITY_LEVEL = 2
INPUT_DIR = "./inputs/operation dulce" LANCEDB_URI = f"{INPUT_DIR}/lancedb" COMMUNITY_REPORT_TABLE = "community_reports" ENTITY_TABLE = "entities" COMMUNITY_TABLE = "communities" RELATIONSHIP_TABLE = "relationships" COVARIATE_TABLE = "covariates" TEXT_UNIT_TABLE = "text_units" COMMUNITY_LEVEL = 2
读取实体¶
在 [4] 中
已复制!
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet")
entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL)
# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
collection_name="default-entity-description",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
print(f"Entity count: {len(entity_df)}")
entity_df.head()
# 读取节点表以获取社区和度数据 entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet") community_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_TABLE}.parquet") entities = read_indexer_entities(entity_df, community_df, COMMUNITY_LEVEL) # 将描述嵌入加载到内存中的 lancedb 向量存储 # 要连接到远程数据库,请指定 url 和端口值。 description_embedding_store = LanceDBVectorStore( collection_name="default-entity-description", ) description_embedding_store.connect(db_uri=LANCEDB_URI) print(f"实体数量:{len(entity_df)}") entity_df.head()
Entity count: 18
Out[4]
id | human_readable_id | title | type | description | text_unit_ids | frequency | degree | x | y | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 425a7862-0aef-4f69-a4c8-8bd42151c9d4 | 0 | ALEX MERCER | PERSON | Agent Alex Mercer is a determined individual w... | [8e938693af886bfd081acbbe8384c3671446bff84a134... | 4 | 9 | 0 | 0 |
1 | bcdbf1fc-0dc1-460f-bc71-2781729c96ba | 1 | TAYLOR CRUZ | PERSON | Agent Taylor Cruz is a commanding and authorit... | [8e938693af886bfd081acbbe8384c3671446bff84a134... | 4 | 8 | 0 | 0 |
2 | ef02ef24-5762-46ce-93ce-7dea6fc86595 | 2 | JORDAN HAYES | PERSON | Dr. Jordan Hayes is a scientist and a member o... | [8e938693af886bfd081acbbe8384c3671446bff84a134... | 4 | 9 | 0 | 0 |
3 | 8b163d27-e43a-4a2c-a26f-866778d8720e | 3 | SAM RIVERA | PERSON | Sam Rivera is a cybersecurity expert and a tal... | [8e938693af886bfd081acbbe8384c3671446bff84a134... | 4 | 8 | 0 | 0 |
4 | 542aa5bd-ba2d-400a-8488-c52d50bc300d | 4 | PARANORMAL MILITARY SQUAD | ORGANIZATION | The PARANORMAL MILITARY SQUAD is an elite grou... | [8e938693af886bfd081acbbe8384c3671446bff84a134... | 2 | 6 | 0 | 0 |
读取关系¶
在 [5] 中
已复制!
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)
print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet") relationships = read_indexer_relationships(relationship_df) print(f"关系数量:{len(relationship_df)}") relationship_df.head()
Relationship count: 54
Out[5]
id | human_readable_id | source | target | description | weight | combined_degree | text_unit_ids | |
---|---|---|---|---|---|---|---|---|
0 | 2bfad9f4-5abd-48d0-8db3-a9cad9120413 | 0 | ALEX MERCER | TAYLOR CRUZ | Alex Mercer and Taylor Cruz are both agents wo... | 37.0 | 17 | [8e938693af886bfd081acbbe8384c3671446bff84a134... |
1 | 6cbb838f-9e83-4086-a684-15c8ed709e52 | 1 | ALEX MERCER | JORDAN HAYES | Alex Mercer and Jordan Hayes are both agents w... | 42.0 | 18 | [8e938693af886bfd081acbbe8384c3671446bff84a134... |
2 | bfdc25f1-80ca-477b-a304-94465b69e680 | 2 | ALEX MERCER | SAM RIVERA | Alex Mercer and Sam Rivera are both agents and... | 26.0 | 17 | [8e938693af886bfd081acbbe8384c3671446bff84a134... |
3 | 7a7e943d-a4f5-487b-9625-5d0907c4c26d | 3 | ALEX MERCER | PARANORMAL MILITARY SQUAD | Alex Mercer is a member of the Paranormal Mili... | 17.0 | 15 | [8e938693af886bfd081acbbe8384c3671446bff84a134... |
4 | 5e00bcb9-a17e-4c27-8241-6ebb286a7fc6 | 4 | ALEX MERCER | DULCE | Alex Mercer is preparing to lead the team into... | 15.0 | 14 | [8e938693af886bfd081acbbe8384c3671446bff84a134... |
在 [6] 中
已复制!
# NOTE: covariates are turned off by default, because they generally need prompt tuning to be valuable
# Please see the GRAPHRAG_CLAIM_* settings
covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")
claims = read_indexer_covariates(covariate_df)
print(f"Claim records: {len(claims)}")
covariates = {"claims": claims}
# 注意:协变量默认是关闭的,因为它们通常需要提示调优才能有价值 # 请查看 GRAPHRAG_CLAIM_* 设置 covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet") claims = read_indexer_covariates(covariate_df) print(f"Claim records: {len(claims)}") covariates = {"claims": claims}
Claim records: 17
读取社区报告¶
在 [7] 中
已复制!
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL)
print(f"Report records: {len(report_df)}")
report_df.head()
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet") reports = read_indexer_reports(report_df, community_df, COMMUNITY_LEVEL) print(f"Report records: {len(report_df)}") report_df.head()
Report records: 2
Out[7]
id | human_readable_id | community | level | parent | children | title | summary | full_content | rank | rating_explanation | findings | full_content_json | period | size | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 6c3a555680d647ac8be866a129c7b0ea | 0 | 0 | 0 | -1 | [] | Operation: Dulce and Dulce Base Exploration | The community revolves around 'Operation: Dulc... | # Operation: Dulce and Dulce Base Exploration\... | 8.5 | The impact severity rating is high due to the ... | [{'explanation': 'Operation: Dulce is a signif... | {\n "title": "Operation: Dulce and Dulce Ba... | 2025-03-04 | 7 |
1 | 0127331a1ea34b8ba19de2c2a4cb3bc9 | 1 | 1 | 0 | -1 | [] | Paranormal Military Squad and Operation: Dulce | The community centers around the Paranormal Mi... | # Paranormal Military Squad and Operation: Dul... | 8.5 | The impact severity rating is high due to the ... | [{'explanation': 'Agent Alex Mercer is a key f... | {\n "title": "Paranormal Military Squad and... | 2025-03-04 | 9 |
读取文本单元¶
在 [8] 中
已复制!
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)
print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet") text_units = read_indexer_text_units(text_unit_df) print(f"文本单元记录:{len(text_unit_df)}") text_unit_df.head()
Text unit records: 5
Out[8]
id | human_readable_id | text | n_tokens | document_ids | entity_ids | relationship_ids | covariate_ids | |
---|---|---|---|---|---|---|---|---|
0 | 8e938693af886bfd081acbbe8384c3671446bff84a134a... | 1 | # Operation: Dulce\n\n## Chapter 1\n\nThe thru... | 1200 | [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... | [425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f... | [2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838... | [745d28dd-be20-411b-85ff-1c69ca70e7b3, 9cba185... |
1 | fd1f46d32e1df6cd429542aeda3d64ddf3745ccb80f443... | 2 | , the hollow echo of the bay a stark reminder ... | 1200 | [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... | [425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f... | [2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838... | [4f9b461f-5e8f-465d-9586-e2fc81787062, 0f74618... |
2 | 7296d9a1f046854d59079dc183de8a054c27c4843d2979... | 3 | differently than praise from others. This was... | 1200 | [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... | [425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f... | [2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838... | [3ef1be9c-4080-4fac-99bd-c4a636248904, 8730b20... |
3 | ac72722a02ac71242a2a91fca323198d04197daf60515d... | 4 | contrast to the rigid silence enveloping the ... | 1200 | [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... | [425a7862-0aef-4f69-a4c8-8bd42151c9d4, bcdbf1f... | [2bfad9f4-5abd-48d0-8db3-a9cad9120413, 6cbb838... | [2c292047-b79a-4958-ab57-7bf7d7a22c92, 3cbd18a... |
4 | 4c277337d461a16aaf8f9760ddb8b44ef220e948a2341d... | 5 | a mask of duty.\n\nIn the midst of the descen... | 35 | [6e81f882f89dd5596e1925dd3ae8a4f0a0edcb55b35a8... | [d084d615-3584-4ec8-9931-90aa6075c764, 4b84859... | [6efdc42e-69a2-47c0-97ec-4b296cd16d5e] | [db8da02f-f889-4bb5-8e81-ab2a72e380bb] |
在 [9] 中
已复制!
from graphrag.config.enums import ModelType
from graphrag.config.models.language_model_config import LanguageModelConfig
from graphrag.language_model.manager import ModelManager
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]
chat_config = LanguageModelConfig(
api_key=api_key,
type=ModelType.OpenAIChat,
model=llm_model,
max_retries=20,
)
chat_model = ModelManager().get_or_create_chat_model(
name="local_search",
model_type=ModelType.OpenAIChat,
config=chat_config,
)
token_encoder = tiktoken.encoding_for_model(llm_model)
embedding_config = LanguageModelConfig(
api_key=api_key,
type=ModelType.OpenAIEmbedding,
model=embedding_model,
max_retries=20,
)
text_embedder = ModelManager().get_or_create_embedding_model(
name="local_search_embedding",
model_type=ModelType.OpenAIEmbedding,
config=embedding_config,
)
from graphrag.config.enums import ModelType from graphrag.config.models.language_model_config import LanguageModelConfig from graphrag.language_model.manager import ModelManager api_key = os.environ["GRAPHRAG_API_KEY"] llm_model = os.environ["GRAPHRAG_LLM_MODEL"] embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"] chat_config = LanguageModelConfig( api_key=api_key, type=ModelType.OpenAIChat, model=llm_model, max_retries=20, ) chat_model = ModelManager().get_or_create_chat_model( name="local_search", model_type=ModelType.OpenAIChat, config=chat_config, ) token_encoder = tiktoken.encoding_for_model(llm_model) embedding_config = LanguageModelConfig( api_key=api_key, type=ModelType.OpenAIEmbedding, model=embedding_model, max_retries=20, ) text_embedder = ModelManager().get_or_create_embedding_model( name="local_search_embedding", model_type=ModelType.OpenAIEmbedding, config=embedding_config, )
创建本地搜索上下文构建器¶
在 [10] 中
已复制!
context_builder = LocalSearchMixedContext(
community_reports=reports,
text_units=text_units,
entities=entities,
relationships=relationships,
# if you did not run covariates during indexing, set this to None
covariates=covariates,
entity_text_embeddings=description_embedding_store,
embedding_vectorstore_key=EntityVectorStoreKey.ID, # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
text_embedder=text_embedder,
token_encoder=token_encoder,
)
context_builder = LocalSearchMixedContext( community_reports=reports, text_units=text_units, entities=entities, relationships=relationships, # 如果您在索引期间未运行协变量,请将其设置为 None covariates=covariates, entity_text_embeddings=description_embedding_store, embedding_vectorstore_key=EntityVectorStoreKey.ID, # 如果向量存储使用实体标题作为 id,请将其设置为 EntityVectorStoreKey.TITLE text_embedder=text_embedder, token_encoder=token_encoder, )
创建本地搜索引擎¶
在 [11] 中
已复制!
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.
local_context_params = {
"text_unit_prop": 0.5,
"community_prop": 0.1,
"conversation_history_max_turns": 5,
"conversation_history_user_turns_only": True,
"top_k_mapped_entities": 10,
"top_k_relationships": 10,
"include_entity_rank": True,
"include_relationship_weight": True,
"include_community_rank": False,
"return_candidate_context": False,
"embedding_vectorstore_key": EntityVectorStoreKey.ID, # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
"max_tokens": 12_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}
model_params = {
"max_tokens": 2_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
"temperature": 0.0,
}
# text_unit_prop: 专用于相关文本单元的上下文窗口的比例 # community_prop: 专用于社区报告的上下文窗口的比例。 # 剩余比例专用于实体和关系。 text_unit_prop 和 community_prop 的总和应 <= 1 # conversation_history_max_turns: 对话历史记录中包含的最大轮数。 # conversation_history_user_turns_only: 如果为 True,则仅在对话历史记录中包含用户查询。 # top_k_mapped_entities: 从实体描述嵌入存储中检索的相关实体数量。 # top_k_relationships: 控制拉入上下文窗口的非网络关系的數量。 # include_entity_rank: 如果为 True,则在上下文窗口的实体表中包含实体排名。 默认实体排名 = 节点度。 # include_relationship_weight: 如果为 True,则在上下文窗口中包含关系权重。 # include_community_rank: 如果为 True,则在上下文窗口中包含社区排名。 # return_candidate_context: 如果为 True,则返回一组包含所有候选实体/关系/协变量记录的数据帧,这些记录 # 可能相关。 请注意,并非所有这些记录都将包含在上下文窗口中。 这些 # 数据帧中的 "in_context" 列指示记录是否包含在上下文窗口中。 # max_tokens: 上下文窗口使用的最大 token 数量。 local_context_params = { "text_unit_prop": 0.5, "community_prop": 0.1, "conversation_history_max_turns": 5, "conversation_history_user_turns_only": True, "top_k_mapped_entities": 10, "top_k_relationships": 10, "include_entity_rank": True, "include_relationship_weight": True, "include_community_rank": False, "return_candidate_context": False, "embedding_vectorstore_key": EntityVectorStoreKey.ID, # 如果向量存储使用实体标题作为 ID,则将其设置为 EntityVectorStoreKey.TITLE "max_tokens": 12_000, # 根据模型上的 token 限制更改此值(如果您使用的模型具有 8k 限制,则一个好的设置可以是 5000) } model_params = { "max_tokens": 2_000, # 根据模型上的 token 限制更改此值(如果您使用的模型具有 8k 限制,则一个好的设置可以是 1000=1500) "temperature": 0.0, }
在 [12] 中
已复制!
search_engine = LocalSearch(
model=chat_model,
context_builder=context_builder,
token_encoder=token_encoder,
model_params=model_params,
context_builder_params=local_context_params,
response_type="multiple paragraphs", # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)
search_engine = LocalSearch( model=chat_model, context_builder=context_builder, token_encoder=token_encoder, model_params=model_params, context_builder_params=local_context_params, response_type="multiple paragraphs", # 描述响应类型和格式的自由文本,可以是任何内容,例如:优先级列表、单个段落、多个段落、多页报告 )
在示例查询上运行本地搜索¶
在 [13] 中
已复制!
result = await search_engine.search("Tell me about Agent Mercer")
print(result.response)
result = await search_engine.search("告诉我关于 Agent Mercer 的信息") print(result.response)
--------------------------------------------------------------------------- AuthenticationError Traceback (most recent call last) Cell In[13], line 1 ----> 1 result = await search_engine.search("Tell me about Agent Mercer") 2 print(result.response) File ~/work/graphrag/graphrag/graphrag/query/structured_search/local_search/search.py:63, in LocalSearch.search(self, query, conversation_history, **kwargs) 61 search_prompt = "" 62 llm_calls, prompt_tokens, output_tokens = {}, {}, {} ---> 63 context_result = self.context_builder.build_context( 64 query=query, 65 conversation_history=conversation_history, 66 **kwargs, 67 **self.context_builder_params, 68 ) 69 llm_calls["build_context"] = context_result.llm_calls 70 prompt_tokens["build_context"] = context_result.prompt_tokens File ~/work/graphrag/graphrag/graphrag/query/structured_search/local_search/mixed_context.py:139, in LocalSearchMixedContext.build_context(self, query, conversation_history, include_entity_names, exclude_entity_names, conversation_history_max_turns, conversation_history_user_turns_only, max_context_tokens, text_unit_prop, community_prop, top_k_mapped_entities, top_k_relationships, include_community_rank, include_entity_rank, rank_description, include_relationship_weight, relationship_ranking_attribute, return_candidate_context, use_community_summary, min_community_rank, community_context_name, column_delimiter, **kwargs) 134 pre_user_questions = "\n".join( 135 conversation_history.get_user_turns(conversation_history_max_turns) 136 ) 137 query = f"{query}\n{pre_user_questions}" --> 139 selected_entities = map_query_to_entities( 140 query=query, 141 text_embedding_vectorstore=self.entity_text_embeddings, 142 text_embedder=self.text_embedder, 143 all_entities_dict=self.entities, 144 embedding_vectorstore_key=self.embedding_vectorstore_key, 145 include_entity_names=include_entity_names, 146 exclude_entity_names=exclude_entity_names, 147 k=top_k_mapped_entities, 148 oversample_scaler=2, 149 ) 151 # build context 152 final_context = list[str]() File ~/work/graphrag/graphrag/graphrag/query/context_builder/entity_extraction.py:58, in map_query_to_entities(query, text_embedding_vectorstore, text_embedder, all_entities_dict, embedding_vectorstore_key, include_entity_names, exclude_entity_names, k, oversample_scaler) 54 matched_entities = [] 55 if query != "": 56 # get entities with highest semantic similarity to query 57 # oversample to account for excluded entities ---> 58 search_results = text_embedding_vectorstore.similarity_search_by_text( 59 text=query, 60 text_embedder=lambda t: text_embedder.embed(t), 61 k=k * oversample_scaler, 62 ) 63 for result in search_results: 64 if embedding_vectorstore_key == EntityVectorStoreKey.ID and isinstance( 65 result.document.id, str 66 ): File ~/work/graphrag/graphrag/graphrag/vector_stores/lancedb.py:134, in LanceDBVectorStore.similarity_search_by_text(self, text, text_embedder, k, **kwargs) 130 def similarity_search_by_text( 131 self, text: str, text_embedder: TextEmbedder, k: int = 10, **kwargs: Any 132 ) -> list[VectorStoreSearchResult]: 133 """Perform a similarity search using a given input text.""" --> 134 query_embedding = text_embedder(text) 135 if query_embedding: 136 return self.similarity_search_by_vector(query_embedding, k) File ~/work/graphrag/graphrag/graphrag/query/context_builder/entity_extraction.py:60, in map_query_to_entities.<locals>.<lambda>(t) 54 matched_entities = [] 55 if query != "": 56 # get entities with highest semantic similarity to query 57 # oversample to account for excluded entities 58 search_results = text_embedding_vectorstore.similarity_search_by_text( 59 text=query, ---> 60 text_embedder=lambda t: text_embedder.embed(t), 61 k=k * oversample_scaler, 62 ) 63 for result in search_results: 64 if embedding_vectorstore_key == EntityVectorStoreKey.ID and isinstance( 65 result.document.id, str 66 ): File ~/work/graphrag/graphrag/graphrag/language_model/providers/fnllm/models.py:237, in OpenAIEmbeddingFNLLM.embed(self, text, **kwargs) 225 def embed(self, text: str, **kwargs) -> list[float]: 226 """ 227 Embed the given text using the Model. 228 (...) 235 The embeddings of the text. 236 """ --> 237 return run_coroutine_sync(self.aembed(text, **kwargs)) File ~/work/graphrag/graphrag/graphrag/language_model/providers/fnllm/utils.py:127, in run_coroutine_sync(coroutine) 125 _thr.start() 126 future = asyncio.run_coroutine_threadsafe(coroutine, _loop) --> 127 return future.result() File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/concurrent/futures/_base.py:456, in Future.result(self, timeout) 454 raise CancelledError() 455 elif self._state == FINISHED: --> 456 return self.__get_result() 457 else: 458 raise TimeoutError() File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/concurrent/futures/_base.py:401, in Future.__get_result(self) 399 if self._exception: 400 try: --> 401 raise self._exception 402 finally: 403 # Break a reference cycle with the exception in self._exception 404 self = None File ~/work/graphrag/graphrag/graphrag/language_model/providers/fnllm/models.py:204, in OpenAIEmbeddingFNLLM.aembed(self, text, **kwargs) 192 async def aembed(self, text: str, **kwargs) -> list[float]: 193 """ 194 Embed the given text using the Model. 195 (...) 202 The embeddings of the text. 203 """ --> 204 response = await self.model([text], **kwargs) 205 if response.output.embeddings is None: 206 msg = "No embeddings found in response" File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/base/base_llm.py:144, in BaseLLM.__call__(self, prompt, **kwargs) 142 try: 143 prompt, kwargs = self._rewrite_input(prompt, kwargs) --> 144 return await self._decorated_target(prompt, **kwargs) 145 except BaseException as e: 146 stack_trace = traceback.format_exc() File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/base/services/rate_limiter.py:75, in RateLimiter.decorate.<locals>.invoke(prompt, **args) 73 async with self._limiter.use(manifest): 74 await self._events.on_limit_acquired(manifest) ---> 75 result = await delegate(prompt, **args) 76 finally: 77 await self._events.on_limit_released(manifest) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/base/base_llm.py:126, in BaseLLM._decorator_target(self, prompt, **kwargs) 121 """Target for the decorator chain. 122 123 Leave signature alone as prompt, kwargs. 124 """ 125 await self._events.on_execute_llm() --> 126 output = await self._execute_llm(prompt, kwargs) 127 result = LLMOutput(output=output) 128 await self._inject_usage(result) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/openai/llm/openai_embeddings_llm.py:126, in OpenAIEmbeddingsLLMImpl._execute_llm(self, prompt, kwargs) 121 local_model_parameters = kwargs.get("model_parameters") 122 embeddings_parameters = self._build_embeddings_parameters( 123 local_model_parameters 124 ) --> 126 result = await self._client.embeddings.create( 127 input=prompt, 128 **embeddings_parameters, 129 ) 130 usage: LLMUsageMetrics | None = None 131 if result.usage: File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/resources/embeddings.py:243, in AsyncEmbeddings.create(self, input, model, dimensions, encoding_format, user, extra_headers, extra_query, extra_body, timeout) 237 embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] 238 base64.b64decode(data), dtype="float32" 239 ).tolist() 241 return obj --> 243 return await self._post( 244 "/embeddings", 245 body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams), 246 options=make_request_options( 247 extra_headers=extra_headers, 248 extra_query=extra_query, 249 extra_body=extra_body, 250 timeout=timeout, 251 post_parser=parser, 252 ), 253 cast_to=CreateEmbeddingResponse, 254 ) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1767, in AsyncAPIClient.post(self, path, cast_to, body, files, options, stream, stream_cls) 1753 async def post( 1754 self, 1755 path: str, (...) 1762 stream_cls: type[_AsyncStreamT] | None = None, 1763 ) -> ResponseT | _AsyncStreamT: 1764 opts = FinalRequestOptions.construct( 1765 method="post", url=path, json_data=body, files=await async_to_httpx_files(files), **options 1766 ) -> 1767 return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1461, in AsyncAPIClient.request(self, cast_to, options, stream, stream_cls, remaining_retries) 1458 else: 1459 retries_taken = 0 -> 1461 return await self._request( 1462 cast_to=cast_to, 1463 options=options, 1464 stream=stream, 1465 stream_cls=stream_cls, 1466 retries_taken=retries_taken, 1467 ) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1562, in AsyncAPIClient._request(self, cast_to, options, stream, stream_cls, retries_taken) 1559 await err.response.aread() 1561 log.debug("Re-raising status error") -> 1562 raise self._make_status_error_from_response(err.response) from None 1564 return await self._process_response( 1565 cast_to=cast_to, 1566 options=options, (...) 1570 retries_taken=retries_taken, 1571 ) AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************zWYA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
在 [14] 中
已复制!
question = "Tell me about Dr. Jordan Hayes"
result = await search_engine.search(question)
print(result.response)
question = "告诉我关于 Dr. Jordan Hayes 的信息" result = await search_engine.search(question) print(result.response)
--------------------------------------------------------------------------- AuthenticationError Traceback (most recent call last) Cell In[14], line 2 1 question = "Tell me about Dr. Jordan Hayes" ----> 2 result = await search_engine.search(question) 3 print(result.response) File ~/work/graphrag/graphrag/graphrag/query/structured_search/local_search/search.py:63, in LocalSearch.search(self, query, conversation_history, **kwargs) 61 search_prompt = "" 62 llm_calls, prompt_tokens, output_tokens = {}, {}, {} ---> 63 context_result = self.context_builder.build_context( 64 query=query, 65 conversation_history=conversation_history, 66 **kwargs, 67 **self.context_builder_params, 68 ) 69 llm_calls["build_context"] = context_result.llm_calls 70 prompt_tokens["build_context"] = context_result.prompt_tokens File ~/work/graphrag/graphrag/graphrag/query/structured_search/local_search/mixed_context.py:139, in LocalSearchMixedContext.build_context(self, query, conversation_history, include_entity_names, exclude_entity_names, conversation_history_max_turns, conversation_history_user_turns_only, max_context_tokens, text_unit_prop, community_prop, top_k_mapped_entities, top_k_relationships, include_community_rank, include_entity_rank, rank_description, include_relationship_weight, relationship_ranking_attribute, return_candidate_context, use_community_summary, min_community_rank, community_context_name, column_delimiter, **kwargs) 134 pre_user_questions = "\n".join( 135 conversation_history.get_user_turns(conversation_history_max_turns) 136 ) 137 query = f"{query}\n{pre_user_questions}" --> 139 selected_entities = map_query_to_entities( 140 query=query, 141 text_embedding_vectorstore=self.entity_text_embeddings, 142 text_embedder=self.text_embedder, 143 all_entities_dict=self.entities, 144 embedding_vectorstore_key=self.embedding_vectorstore_key, 145 include_entity_names=include_entity_names, 146 exclude_entity_names=exclude_entity_names, 147 k=top_k_mapped_entities, 148 oversample_scaler=2, 149 ) 151 # build context 152 final_context = list[str]() File ~/work/graphrag/graphrag/graphrag/query/context_builder/entity_extraction.py:58, in map_query_to_entities(query, text_embedding_vectorstore, text_embedder, all_entities_dict, embedding_vectorstore_key, include_entity_names, exclude_entity_names, k, oversample_scaler) 54 matched_entities = [] 55 if query != "": 56 # get entities with highest semantic similarity to query 57 # oversample to account for excluded entities ---> 58 search_results = text_embedding_vectorstore.similarity_search_by_text( 59 text=query, 60 text_embedder=lambda t: text_embedder.embed(t), 61 k=k * oversample_scaler, 62 ) 63 for result in search_results: 64 if embedding_vectorstore_key == EntityVectorStoreKey.ID and isinstance( 65 result.document.id, str 66 ): File ~/work/graphrag/graphrag/graphrag/vector_stores/lancedb.py:134, in LanceDBVectorStore.similarity_search_by_text(self, text, text_embedder, k, **kwargs) 130 def similarity_search_by_text( 131 self, text: str, text_embedder: TextEmbedder, k: int = 10, **kwargs: Any 132 ) -> list[VectorStoreSearchResult]: 133 """Perform a similarity search using a given input text.""" --> 134 query_embedding = text_embedder(text) 135 if query_embedding: 136 return self.similarity_search_by_vector(query_embedding, k) File ~/work/graphrag/graphrag/graphrag/query/context_builder/entity_extraction.py:60, in map_query_to_entities.<locals>.<lambda>(t) 54 matched_entities = [] 55 if query != "": 56 # get entities with highest semantic similarity to query 57 # oversample to account for excluded entities 58 search_results = text_embedding_vectorstore.similarity_search_by_text( 59 text=query, ---> 60 text_embedder=lambda t: text_embedder.embed(t), 61 k=k * oversample_scaler, 62 ) 63 for result in search_results: 64 if embedding_vectorstore_key == EntityVectorStoreKey.ID and isinstance( 65 result.document.id, str 66 ): File ~/work/graphrag/graphrag/graphrag/language_model/providers/fnllm/models.py:237, in OpenAIEmbeddingFNLLM.embed(self, text, **kwargs) 225 def embed(self, text: str, **kwargs) -> list[float]: 226 """ 227 Embed the given text using the Model. 228 (...) 235 The embeddings of the text. 236 """ --> 237 return run_coroutine_sync(self.aembed(text, **kwargs)) File ~/work/graphrag/graphrag/graphrag/language_model/providers/fnllm/utils.py:127, in run_coroutine_sync(coroutine) 125 _thr.start() 126 future = asyncio.run_coroutine_threadsafe(coroutine, _loop) --> 127 return future.result() File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/concurrent/futures/_base.py:456, in Future.result(self, timeout) 454 raise CancelledError() 455 elif self._state == FINISHED: --> 456 return self.__get_result() 457 else: 458 raise TimeoutError() File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/concurrent/futures/_base.py:401, in Future.__get_result(self) 399 if self._exception: 400 try: --> 401 raise self._exception 402 finally: 403 # Break a reference cycle with the exception in self._exception 404 self = None File ~/work/graphrag/graphrag/graphrag/language_model/providers/fnllm/models.py:204, in OpenAIEmbeddingFNLLM.aembed(self, text, **kwargs) 192 async def aembed(self, text: str, **kwargs) -> list[float]: 193 """ 194 Embed the given text using the Model. 195 (...) 202 The embeddings of the text. 203 """ --> 204 response = await self.model([text], **kwargs) 205 if response.output.embeddings is None: 206 msg = "No embeddings found in response" File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/base/base_llm.py:144, in BaseLLM.__call__(self, prompt, **kwargs) 142 try: 143 prompt, kwargs = self._rewrite_input(prompt, kwargs) --> 144 return await self._decorated_target(prompt, **kwargs) 145 except BaseException as e: 146 stack_trace = traceback.format_exc() File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/base/services/rate_limiter.py:75, in RateLimiter.decorate.<locals>.invoke(prompt, **args) 73 async with self._limiter.use(manifest): 74 await self._events.on_limit_acquired(manifest) ---> 75 result = await delegate(prompt, **args) 76 finally: 77 await self._events.on_limit_released(manifest) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/base/base_llm.py:126, in BaseLLM._decorator_target(self, prompt, **kwargs) 121 """Target for the decorator chain. 122 123 Leave signature alone as prompt, kwargs. 124 """ 125 await self._events.on_execute_llm() --> 126 output = await self._execute_llm(prompt, kwargs) 127 result = LLMOutput(output=output) 128 await self._inject_usage(result) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/openai/llm/openai_embeddings_llm.py:126, in OpenAIEmbeddingsLLMImpl._execute_llm(self, prompt, kwargs) 121 local_model_parameters = kwargs.get("model_parameters") 122 embeddings_parameters = self._build_embeddings_parameters( 123 local_model_parameters 124 ) --> 126 result = await self._client.embeddings.create( 127 input=prompt, 128 **embeddings_parameters, 129 ) 130 usage: LLMUsageMetrics | None = None 131 if result.usage: File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/resources/embeddings.py:243, in AsyncEmbeddings.create(self, input, model, dimensions, encoding_format, user, extra_headers, extra_query, extra_body, timeout) 237 embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] 238 base64.b64decode(data), dtype="float32" 239 ).tolist() 241 return obj --> 243 return await self._post( 244 "/embeddings", 245 body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams), 246 options=make_request_options( 247 extra_headers=extra_headers, 248 extra_query=extra_query, 249 extra_body=extra_body, 250 timeout=timeout, 251 post_parser=parser, 252 ), 253 cast_to=CreateEmbeddingResponse, 254 ) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1767, in AsyncAPIClient.post(self, path, cast_to, body, files, options, stream, stream_cls) 1753 async def post( 1754 self, 1755 path: str, (...) 1762 stream_cls: type[_AsyncStreamT] | None = None, 1763 ) -> ResponseT | _AsyncStreamT: 1764 opts = FinalRequestOptions.construct( 1765 method="post", url=path, json_data=body, files=await async_to_httpx_files(files), **options 1766 ) -> 1767 return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1461, in AsyncAPIClient.request(self, cast_to, options, stream, stream_cls, remaining_retries) 1458 else: 1459 retries_taken = 0 -> 1461 return await self._request( 1462 cast_to=cast_to, 1463 options=options, 1464 stream=stream, 1465 stream_cls=stream_cls, 1466 retries_taken=retries_taken, 1467 ) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1562, in AsyncAPIClient._request(self, cast_to, options, stream, stream_cls, retries_taken) 1559 await err.response.aread() 1561 log.debug("Re-raising status error") -> 1562 raise self._make_status_error_from_response(err.response) from None 1564 return await self._process_response( 1565 cast_to=cast_to, 1566 options=options, (...) 1570 retries_taken=retries_taken, 1571 ) AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************zWYA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
检查用于生成响应的上下文数据¶
在 [15] 中
已复制!
result.context_data["entities"].head()
result.context_data["entities"].head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[15], line 1 ----> 1 result.context_data["entities"].head() NameError: name 'result' is not defined
在 [16] 中
已复制!
result.context_data["relationships"].head()
result.context_data["relationships"].head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[16], line 1 ----> 1 result.context_data["relationships"].head() NameError: name 'result' is not defined
在 [17] 中
已复制!
if "reports" in result.context_data:
result.context_data["reports"].head()
if "reports" in result.context_data: result.context_data["reports"].head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[17], line 1 ----> 1 if "reports" in result.context_data: 2 result.context_data["reports"].head() NameError: name 'result' is not defined
在 [18] 中
已复制!
result.context_data["sources"].head()
result.context_data["sources"].head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[18], line 1 ----> 1 result.context_data["sources"].head() NameError: name 'result' is not defined
在 [19] 中
已复制!
if "claims" in result.context_data:
print(result.context_data["claims"].head())
if "claims" in result.context_data: print(result.context_data["claims"].head())
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[19], line 1 ----> 1 if "claims" in result.context_data: 2 print(result.context_data["claims"].head()) NameError: name 'result' is not defined
问题生成¶
此函数接受用户查询列表并生成下一个候选问题。
在 [20] 中
已复制!
question_generator = LocalQuestionGen(
model=chat_model,
context_builder=context_builder,
token_encoder=token_encoder,
model_params=model_params,
context_builder_params=local_context_params,
)
question_generator = LocalQuestionGen( model=chat_model, context_builder=context_builder, token_encoder=token_encoder, model_params=model_params, context_builder_params=local_context_params, )
在 [21] 中
已复制!
question_history = [
"Tell me about Agent Mercer",
"What happens in Dulce military base?",
]
candidate_questions = await question_generator.agenerate(
question_history=question_history, context_data=None, question_count=5
)
print(candidate_questions.response)
question_history = [ "告诉我关于 Agent Mercer 的信息", "Dulce 军事基地发生了什么?", ] candidate_questions = await question_generator.agenerate( question_history=question_history, context_data=None, question_count=5 ) print(candidate_questions.response)
--------------------------------------------------------------------------- AuthenticationError Traceback (most recent call last) Cell In[21], line 5 1 question_history = [ 2 "Tell me about Agent Mercer", 3 "What happens in Dulce military base?", 4 ] ----> 5 candidate_questions = await question_generator.agenerate( 6 question_history=question_history, context_data=None, question_count=5 7 ) 8 print(candidate_questions.response) File ~/work/graphrag/graphrag/graphrag/query/question_gen/local_gen.py:80, in LocalQuestionGen.agenerate(self, question_history, context_data, question_count, **kwargs) 74 conversation_history = ConversationHistory.from_list(history) 76 if context_data is None: 77 # generate context data based on the question history 78 result = cast( 79 "ContextBuilderResult", ---> 80 self.context_builder.build_context( 81 query=question_text, 82 conversation_history=conversation_history, 83 **kwargs, 84 **self.context_builder_params, 85 ), 86 ) 87 context_data = cast("str", result.context_chunks) 88 context_records = result.context_records File ~/work/graphrag/graphrag/graphrag/query/structured_search/local_search/mixed_context.py:139, in LocalSearchMixedContext.build_context(self, query, conversation_history, include_entity_names, exclude_entity_names, conversation_history_max_turns, conversation_history_user_turns_only, max_context_tokens, text_unit_prop, community_prop, top_k_mapped_entities, top_k_relationships, include_community_rank, include_entity_rank, rank_description, include_relationship_weight, relationship_ranking_attribute, return_candidate_context, use_community_summary, min_community_rank, community_context_name, column_delimiter, **kwargs) 134 pre_user_questions = "\n".join( 135 conversation_history.get_user_turns(conversation_history_max_turns) 136 ) 137 query = f"{query}\n{pre_user_questions}" --> 139 selected_entities = map_query_to_entities( 140 query=query, 141 text_embedding_vectorstore=self.entity_text_embeddings, 142 text_embedder=self.text_embedder, 143 all_entities_dict=self.entities, 144 embedding_vectorstore_key=self.embedding_vectorstore_key, 145 include_entity_names=include_entity_names, 146 exclude_entity_names=exclude_entity_names, 147 k=top_k_mapped_entities, 148 oversample_scaler=2, 149 ) 151 # build context 152 final_context = list[str]() File ~/work/graphrag/graphrag/graphrag/query/context_builder/entity_extraction.py:58, in map_query_to_entities(query, text_embedding_vectorstore, text_embedder, all_entities_dict, embedding_vectorstore_key, include_entity_names, exclude_entity_names, k, oversample_scaler) 54 matched_entities = [] 55 if query != "": 56 # get entities with highest semantic similarity to query 57 # oversample to account for excluded entities ---> 58 search_results = text_embedding_vectorstore.similarity_search_by_text( 59 text=query, 60 text_embedder=lambda t: text_embedder.embed(t), 61 k=k * oversample_scaler, 62 ) 63 for result in search_results: 64 if embedding_vectorstore_key == EntityVectorStoreKey.ID and isinstance( 65 result.document.id, str 66 ): File ~/work/graphrag/graphrag/graphrag/vector_stores/lancedb.py:134, in LanceDBVectorStore.similarity_search_by_text(self, text, text_embedder, k, **kwargs) 130 def similarity_search_by_text( 131 self, text: str, text_embedder: TextEmbedder, k: int = 10, **kwargs: Any 132 ) -> list[VectorStoreSearchResult]: 133 """Perform a similarity search using a given input text.""" --> 134 query_embedding = text_embedder(text) 135 if query_embedding: 136 return self.similarity_search_by_vector(query_embedding, k) File ~/work/graphrag/graphrag/graphrag/query/context_builder/entity_extraction.py:60, in map_query_to_entities.<locals>.<lambda>(t) 54 matched_entities = [] 55 if query != "": 56 # get entities with highest semantic similarity to query 57 # oversample to account for excluded entities 58 search_results = text_embedding_vectorstore.similarity_search_by_text( 59 text=query, ---> 60 text_embedder=lambda t: text_embedder.embed(t), 61 k=k * oversample_scaler, 62 ) 63 for result in search_results: 64 if embedding_vectorstore_key == EntityVectorStoreKey.ID and isinstance( 65 result.document.id, str 66 ): File ~/work/graphrag/graphrag/graphrag/language_model/providers/fnllm/models.py:237, in OpenAIEmbeddingFNLLM.embed(self, text, **kwargs) 225 def embed(self, text: str, **kwargs) -> list[float]: 226 """ 227 Embed the given text using the Model. 228 (...) 235 The embeddings of the text. 236 """ --> 237 return run_coroutine_sync(self.aembed(text, **kwargs)) File ~/work/graphrag/graphrag/graphrag/language_model/providers/fnllm/utils.py:127, in run_coroutine_sync(coroutine) 125 _thr.start() 126 future = asyncio.run_coroutine_threadsafe(coroutine, _loop) --> 127 return future.result() File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/concurrent/futures/_base.py:456, in Future.result(self, timeout) 454 raise CancelledError() 455 elif self._state == FINISHED: --> 456 return self.__get_result() 457 else: 458 raise TimeoutError() File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/concurrent/futures/_base.py:401, in Future.__get_result(self) 399 if self._exception: 400 try: --> 401 raise self._exception 402 finally: 403 # Break a reference cycle with the exception in self._exception 404 self = None File ~/work/graphrag/graphrag/graphrag/language_model/providers/fnllm/models.py:204, in OpenAIEmbeddingFNLLM.aembed(self, text, **kwargs) 192 async def aembed(self, text: str, **kwargs) -> list[float]: 193 """ 194 Embed the given text using the Model. 195 (...) 202 The embeddings of the text. 203 """ --> 204 response = await self.model([text], **kwargs) 205 if response.output.embeddings is None: 206 msg = "No embeddings found in response" File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/base/base_llm.py:144, in BaseLLM.__call__(self, prompt, **kwargs) 142 try: 143 prompt, kwargs = self._rewrite_input(prompt, kwargs) --> 144 return await self._decorated_target(prompt, **kwargs) 145 except BaseException as e: 146 stack_trace = traceback.format_exc() File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/base/services/rate_limiter.py:75, in RateLimiter.decorate.<locals>.invoke(prompt, **args) 73 async with self._limiter.use(manifest): 74 await self._events.on_limit_acquired(manifest) ---> 75 result = await delegate(prompt, **args) 76 finally: 77 await self._events.on_limit_released(manifest) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/base/base_llm.py:126, in BaseLLM._decorator_target(self, prompt, **kwargs) 121 """Target for the decorator chain. 122 123 Leave signature alone as prompt, kwargs. 124 """ 125 await self._events.on_execute_llm() --> 126 output = await self._execute_llm(prompt, kwargs) 127 result = LLMOutput(output=output) 128 await self._inject_usage(result) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/fnllm/openai/llm/openai_embeddings_llm.py:126, in OpenAIEmbeddingsLLMImpl._execute_llm(self, prompt, kwargs) 121 local_model_parameters = kwargs.get("model_parameters") 122 embeddings_parameters = self._build_embeddings_parameters( 123 local_model_parameters 124 ) --> 126 result = await self._client.embeddings.create( 127 input=prompt, 128 **embeddings_parameters, 129 ) 130 usage: LLMUsageMetrics | None = None 131 if result.usage: File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/resources/embeddings.py:243, in AsyncEmbeddings.create(self, input, model, dimensions, encoding_format, user, extra_headers, extra_query, extra_body, timeout) 237 embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] 238 base64.b64decode(data), dtype="float32" 239 ).tolist() 241 return obj --> 243 return await self._post( 244 "/embeddings", 245 body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams), 246 options=make_request_options( 247 extra_headers=extra_headers, 248 extra_query=extra_query, 249 extra_body=extra_body, 250 timeout=timeout, 251 post_parser=parser, 252 ), 253 cast_to=CreateEmbeddingResponse, 254 ) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1767, in AsyncAPIClient.post(self, path, cast_to, body, files, options, stream, stream_cls) 1753 async def post( 1754 self, 1755 path: str, (...) 1762 stream_cls: type[_AsyncStreamT] | None = None, 1763 ) -> ResponseT | _AsyncStreamT: 1764 opts = FinalRequestOptions.construct( 1765 method="post", url=path, json_data=body, files=await async_to_httpx_files(files), **options 1766 ) -> 1767 return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1461, in AsyncAPIClient.request(self, cast_to, options, stream, stream_cls, remaining_retries) 1458 else: 1459 retries_taken = 0 -> 1461 return await self._request( 1462 cast_to=cast_to, 1463 options=options, 1464 stream=stream, 1465 stream_cls=stream_cls, 1466 retries_taken=retries_taken, 1467 ) File ~/.cache/pypoetry/virtualenvs/graphrag-F2jvqev7-py3.11/lib/python3.11/site-packages/openai/_base_client.py:1562, in AsyncAPIClient._request(self, cast_to, options, stream, stream_cls, retries_taken) 1559 await err.response.aread() 1561 log.debug("Re-raising status error") -> 1562 raise self._make_status_error_from_response(err.response) from None 1564 return await self._process_response( 1565 cast_to=cast_to, 1566 options=options, (...) 1570 retries_taken=retries_taken, 1571 ) AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************zWYA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}