上下文關(guān)聯(lián)
上下文關(guān)聯(lián)相關(guān)參數(shù):
- 知識(shí)相關(guān)度閾值score_threshold
- 內(nèi)容條數(shù)k
- 是否啟用上下文關(guān)聯(lián)chunk_conent
- 上下文最大長度chunk_size
其主要作用是在所在文檔中擴(kuò)展與當(dāng)前query相似度較高的知識(shí)庫的內(nèi)容,作為相關(guān)信息與query按照prompt規(guī)則組合后作為輸入獲得模型的回答。
- 獲取查詢句query嵌入:
faiss.py
def similarity_search_with_score(
self, query: str, k: int = 4
) -> List[Tuple[Document, float]]:
"""Return docs most similar to query.
Args:
query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
Returns:
List of Documents most similar to the query and score for each
"""
embedding = self.embedding_function(query)
docs = self.similarity_search_with_score_by_vector(embedding, k)
return docs
- 上下文生成:
MyFAISS.py
def seperate_list(self, ls: List[int]) -> List[List[int]]:
# TODO: 增加是否屬于同一文檔的判斷
lists = []
ls1 = [ls[0]]
for i in range(1, len(ls)):
if ls[i - 1] + 1 == ls[i]:
ls1.append(ls[i])
else:
lists.append(ls1)
ls1 = [ls[i]]
lists.append(ls1)
return lists
def similarity_search_with_score_by_vector(
self, embedding: List[float], k: int = 4
) -> List[Document]:
faiss = dependable_faiss_import()
# (1,1024)
vector = np.array([embedding], dtype=np.float32)
# 默認(rèn)FALSE
if self._normalize_L2:
faiss.normalize_L2(vector)
# shape均為(1, k),這步獲取與query有top-k相似度的知識(shí)庫
scores, indices = self.index.search(vector, k)
docs = []
id_set = set()
store_len = len(self.index_to_docstore_id)
rearrange_id_list = False
# 遍歷找到的k個(gè)最相似知識(shí)庫的索引
# k是第一次的篩選條件,score是第二次的篩選條件
for j, i in enumerate(indices[0]):
if i == -1 or 0 < self.score_threshold < scores[0][j]:
# This happens when not enough docs are returned.
continue
if i in self.index_to_docstore_id:
_id = self.index_to_docstore_id[i]
# 執(zhí)行接下來的操作
else:
continue
# index→id→content
doc = self.docstore.search(_id)
if (not self.chunk_conent) or ("context_expand" in doc.metadata and not doc.metadata["context_expand"]):
# 匹配出的文本如果不需要擴(kuò)展上下文則執(zhí)行如下代碼
if not isinstance(doc, Document):
raise ValueError(f"Could not find document for id {_id}, got {doc}")
doc.metadata["score"] = int(scores[0][j])
docs.append(doc)
continue
# 其實(shí)存的都是index
id_set.add(i)
docs_len = len(doc.page_content)
# 跟外部變量定義的k重名了,爛
# 一個(gè)知識(shí)庫是分句后得到的一句話,i是當(dāng)前知識(shí)庫在總知識(shí)庫中的位置,store_len是總知識(shí)庫大小
# 所以k說的是擴(kuò)充上下文時(shí)最多能跨多少個(gè)知識(shí)庫
for k in range(1, max(i, store_len - i)):
break_flag = False
if "context_expand_method" in doc.metadata and doc.metadata["context_expand_method"] == "forward":
expand_range = [i + k]
elif "context_expand_method" in doc.metadata and doc.metadata["context_expand_method"] == "backward":
expand_range = [i - k]
else:
# i=4922, k=1 → [4923, 4921]
expand_range = [i + k, i - k]
for l in expand_range:
# 確保擴(kuò)展上下文時(shí)不會(huì)造成重復(fù)
if l not in id_set and 0 <= l < len(self.index_to_docstore_id):
_id0 = self.index_to_docstore_id[l]
doc0 = self.docstore.search(_id0)
# 如果當(dāng)前字?jǐn)?shù)大于250或者是知識(shí)庫跨了文件,擴(kuò)充上下文過程終止
# 這一句有些問題,有一端跨文件就終止,應(yīng)該是兩端同時(shí)跨才終止才對(duì)
if docs_len + len(doc0.page_content) > self.chunk_size or doc0.metadata["source"] != \
doc.metadata["source"]:
break_flag = True
break
elif doc0.metadata["source"] == doc.metadata["source"]:
docs_len += len(doc0.page_content)
id_set.add(l)
rearrange_id_list = True
if break_flag:
break
# 如果沒有擴(kuò)展上下文(不需要或是不能)
if (not self.chunk_conent) or (not rearrange_id_list):
return docs
if len(id_set) == 0 and self.score_threshold > 0:
return []
id_list = sorted(list(id_set))
# 連續(xù)必然屬于同一文檔,但不連續(xù)也可能在同一文檔
# 返回二級(jí)列表,第一級(jí)是連續(xù)的index列表,第二級(jí)是具體index
id_lists = self.seperate_list(id_list)
for id_seq in id_lists:
for id in id_seq:
if id == id_seq[0]:
_id = self.index_to_docstore_id[id]
# doc = self.docstore.search(_id)
doc = copy.deepcopy(self.docstore.search(_id))
else:
_id0 = self.index_to_docstore_id[id]
doc0 = self.docstore.search(_id0)
doc.page_content += " " + doc0.page_content
if not isinstance(doc, Document):
raise ValueError(f"Could not find document for id {_id}, got {doc}")
# indices為相關(guān)文件的索引
# 因?yàn)榭赡軙?huì)將多個(gè)連續(xù)的id合并,因此需要將同一seq內(nèi)所有位于top-k的知識(shí)庫的分?jǐn)?shù)取最小值作為seq對(duì)應(yīng)的分?jǐn)?shù)
doc_score = min([scores[0][id] for id in [indices[0].tolist().index(i) for i in id_seq if i in indices[0]]])
doc.metadata["score"] = int(doc_score)
docs.append(doc)
# 注意這里docs沒有按相似度排序,可以自行加個(gè)sort
return docs
考慮到相似度計(jì)算在雙向擴(kuò)展上有一些問題,對(duì)算法做了一些修改:
def similarity_search_with_score_by_vector(
self, embedding: List[float], k: int = 4
) -> List[Document]:
faiss = dependable_faiss_import()
# (1,1024)
vector = np.array([embedding], dtype=np.float32)
# 默認(rèn)FALSE
if self._normalize_L2:
faiss.normalize_L2(vector)
# shape均為(1, k)
scores, indices = self.index.search(vector, k)
docs = []
id_set = set()
store_len = len(self.index_to_docstore_id)
rearrange_id_list = False
# 存儲(chǔ)關(guān)鍵句
keysentences = []
# 遍歷找到的k個(gè)最近相關(guān)文檔的索引
# top-k是第一次的篩選條件,score是第二次的篩選條件
for j, i in enumerate(indices[0]):
if i == -1 or 0 < self.score_threshold < scores[0][j]:
# This happens when not enough docs are returned.
continue
if i in self.index_to_docstore_id:
_id = self.index_to_docstore_id[i]
# 執(zhí)行接下來的操作
else:
continue
# index→id→content
doc = self.docstore.search(_id)
if (not self.chunk_conent) or ("context_expand" in doc.metadata and not doc.metadata["context_expand"]):
# 匹配出的文本如果不需要擴(kuò)展上下文則執(zhí)行如下代碼
if not isinstance(doc, Document):
raise ValueError(f"Could not find document for id {_id}, got {doc}")
doc.metadata["score"] = int(scores[0][j])
docs.append(doc)
continue
# 其實(shí)存的都是index
id_set.add(i)
docs_len = len(doc.page_content.strip())
# 跟外部變量定義的k重名了,爛
# 一個(gè)知識(shí)庫是分句后得到的一句話,i是當(dāng)前知識(shí)庫在總知識(shí)庫中的位置,store_len是總知識(shí)庫大小
# 所以k說的是擴(kuò)充上下文時(shí)最多能跨多少個(gè)知識(shí)庫
for k in range(1, max(i, store_len - i)):
single_break_flag = 0
double_break_flag = 0
if "context_expand_method" in doc.metadata and doc.metadata["context_expand_method"] == "forward":
expand_range = [i + k]
elif "context_expand_method" in doc.metadata and doc.metadata["context_expand_method"] == "backward":
expand_range = [i - k]
else:
# i=4922, k=1 → [4923, 4921]
expand_range = [i + k, i - k]
for l in expand_range:
# 確保擴(kuò)展上下文時(shí)不會(huì)造成重復(fù)
if l not in id_set and 0 <= l < len(self.index_to_docstore_id):
_id0 = self.index_to_docstore_id[l]
doc0 = self.docstore.search(_id0)
# 如果當(dāng)前字?jǐn)?shù)大于250或者是知識(shí)庫跨了文件,擴(kuò)充上下文過程終止
# 這一句有些問題,有一端跨文件就終止,應(yīng)該是兩端同時(shí)跨才終止才對(duì)
if docs_len + len(doc0.page_content.strip()) > self.chunk_size or doc0.metadata["source"] != \
doc.metadata["source"]:
single_break_flag = 1
if docs_len + len(doc0.page_content.strip()) > self.chunk_size:
double_break_flag = 2
break
else:
double_break_flag += 1
elif doc0.metadata["source"] == doc.metadata["source"]:
docs_len += len(doc0.page_content.strip())
id_set.add(l)
rearrange_id_list = True
if double_break_flag == 2:
break
# 如果沒有擴(kuò)展上下文(不需要或是不能)
if (not self.chunk_conent) or (not rearrange_id_list):
return docs, keysentences
if len(id_set) == 0 and self.score_threshold > 0:
return [], []
id_list = sorted(list(id_set))
# 連續(xù)必然屬于同一文檔,但不連續(xù)也可能在同一文檔
# 返回二級(jí)列表,第一級(jí)是連續(xù)的index列表,第二級(jí)是具體index
id_lists = self.seperate_list(id_list)
keyids = indices[0].tolist()
filter_rate = 0.05
for id_seq in id_lists:
seqsentences = []
for id_index, id in enumerate(id_seq):
if id == id_seq[0]:
_id = self.index_to_docstore_id[id]
# doc = self.docstore.search(_id)
doc = copy.deepcopy(self.docstore.search(_id))
if id in keyids:
seqsentences.append({'key': doc.page_content.strip(), 'rate': id_index/len(id_seq)})
doc.page_content = "<b>"+doc.page_content.strip()+"</b>"
else:
_id0 = self.index_to_docstore_id[id]
doc0 = self.docstore.search(_id0)
if id in keyids:
seqsentences.append({'key': doc0.page_content.strip(), 'rate': id_index/len(id_seq)})
doc.page_content += " <b>" + doc0.page_content.strip()+"</b>"
else:
doc.page_content += " " + doc0.page_content.strip()
if not isinstance(doc, Document):
raise ValueError(f"Could not find document for id {_id}, got {doc}")
# indices為相關(guān)文件的索引
# 因?yàn)榭赡軙?huì)將多個(gè)連續(xù)的id合并,因此需要將同一seq內(nèi)所有位于top-k的知識(shí)庫的分?jǐn)?shù)取最小值作為seq對(duì)應(yīng)的分?jǐn)?shù)
doc_score = min([scores[0][id] for id in [indices[0].tolist().index(i) for i in id_seq if i in indices[0]]])
doc.metadata["score"] = int(doc_score)
docs.append(doc)
seqsentences.sort(key=lambda data: data['rate'])
if seqsentences[-1]['rate'] < filter_rate:
keysentences.append(seqsentences[-1]['key'])
else:
keysentences += [seqsentence['key'] for seqsentence in seqsentences if seqsentence['rate'] > filter_rate]
docs.sort(key=lambda doc: doc.metadata['score'])
return docs, keysentences
- prompt生成:
local_doc_qa.py
def get_knowledge_based_answer(self, query, vs_path, chat_history=[], streaming: bool = STREAMING):
related_docs_with_score = vector_store.similarity_search_with_score(query, k=self.top_k)
torch_gc()
if len(related_docs_with_score) > 0:
prompt = generate_prompt(related_docs_with_score, query)
else:
prompt = query
answer_result_stream_result = self.llm_model_chain(
{"prompt": prompt, "history": chat_history, "streaming": streaming})
def generate_prompt(related_docs: List[str],
query: str,
prompt_template: str = PROMPT_TEMPLATE, ) -> str:
context = "\n".join([doc.page_content for doc in related_docs])
prompt = prompt_template.replace("{question}", query).replace("{context}", context)
return prompt
對(duì)話輪數(shù)
其實(shí)就是要存多少歷史記錄,如果為0的話就是在執(zhí)行當(dāng)前對(duì)話時(shí)不考慮歷史問答
- 模型內(nèi)部調(diào)用時(shí)使用,以chatglm為例:
chatglm_llm.py
def _generate_answer(self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
generate_with_callback: AnswerResultStream = None) -> None:
history = inputs[self.history_key]
streaming = inputs[self.streaming_key]
prompt = inputs[self.prompt_key]
print(f"__call:{prompt}")
# Create the StoppingCriteriaList with the stopping strings
stopping_criteria_list = transformers.StoppingCriteriaList()
# 定義模型stopping_criteria 隊(duì)列,在每次響應(yīng)時(shí)將 torch.LongTensor, torch.FloatTensor同步到AnswerResult
listenerQueue = AnswerResultQueueSentinelTokenListenerQueue()
stopping_criteria_list.append(listenerQueue)
if streaming:
history += [[]]
for inum, (stream_resp, _) in enumerate(self.checkPoint.model.stream_chat(
self.checkPoint.tokenizer,
prompt,
# 為0時(shí)history返回[]
history=history[-self.history_len:-1] if self.history_len > 0 else [],
max_length=self.max_token,
temperature=self.temperature,
top_p=self.top_p,
top_k=self.top_k,
stopping_criteria=stopping_criteria_list
)):
向量匹配 top k
雖然放在了模型配置那一頁,但實(shí)際上還是用來控制上下文關(guān)聯(lián)里面的內(nèi)容條數(shù)k的,不知道為什么寫了兩遍。。。文章來源:http://www.zghlxwxcb.cn/news/detail-630544.html
控制生成質(zhì)量的參數(shù)
這些參數(shù)沒有在前端顯式地給出,而是寫死在了模型定義里文章來源地址http://www.zghlxwxcb.cn/news/detail-630544.html
- 模型定義,以chatglm為例:
chatglm_llm.py
class ChatGLMLLMChain(BaseAnswer, Chain, ABC):
max_token: int = 10000
temperature: float = 0.01
# 相關(guān)度
top_p = 0.4
# 候選詞數(shù)量
top_k = 10
checkPoint: LoaderCheckPoint = None
# history = []
history_len: int = 10
參數(shù)設(shè)置心得
- score_threshold和k設(shè)太小會(huì)找不到問題對(duì)應(yīng)的原文件,太大找到一堆不相關(guān)的
- chunk_size設(shè)太小不能在原文件里找到問題對(duì)應(yīng)的原文,太大無法有效歸納出答案
- temperature和top_p默認(rèn)值下生成的答案基本固定,但也很死板;過大的temperature導(dǎo)致回答的事實(shí)不穩(wěn)定;過大的top_p導(dǎo)致回答的語言風(fēng)格不穩(wěn)定;調(diào)整top_k沒發(fā)現(xiàn)結(jié)果有什么變化
到了這里,關(guān)于langchain-ChatGLM源碼閱讀:參數(shù)設(shè)置的文章就介紹完了。如果您還想了解更多內(nèi)容,請(qǐng)?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!