QEUR23_CHRLTM4 - FACTを抽出してマトリックスに変換する
# -----
import re, math, time
# ----
import pandas as pd
import numpy as np
from langchain_core.prompts import ChatPromptTemplate
from jinja2 import Environment, FileSystemLoader
# ----
# 選択してね(今回はCohere)
import cohere
from langchain_groq import ChatGroq
from langchain_cohere import ChatCohere
# ----
# [Summary]Excelファイルからデータフレームを作成する(1)
df_sum = pd.read_excel('qq_for_JED_step1A_summary_input.xlsx')
df_sum
# ---
arr_concept = df_sum.loc[:,"CONCEPT"].values
arr_goal_A = df_sum.loc[:,"GOAL(A)"].values
arr_goal_B = df_sum.loc[:,"GOAL(B)"].values
arr_AUDIENCE = df_sum.loc[:,"personnel_AUDIENCE"].values
#print(arr_concept)
# ---
str_concept = arr_concept[0]
str_goal_A = arr_goal_A[0]
str_goal_B = arr_goal_B[0]
str_AUDIENCE = arr_AUDIENCE[0]
print(str_goal_A)
# -----
# [Question to Question]Excelファイルからデータフレームを作成する(2)
# -----
df_qq = pd.read_excel('qq_JDE_step1A(1).xlsx')
# Step1の質問数
num_df = len(df_qq)
#df_qq
# -----
# EXCELシートから、文字列を抽出する
# -----
# Question(Initial)
arr_QInitial = df_qq.loc[:,'QInitial'].values
# -----
# Answer(Initial)
arr_AInitial = df_qq.loc[:,'AInitial'].values
print("--- Answer(Initial) ---")
print(arr_AInitial[0])
# -----
# Jinja2テンプレートの読み込み
env = Environment(loader=FileSystemLoader('.'))
# その1:Q-AND-Aの場合
template = env.get_template('fact_forward_JED_template.j2')
D先生: “ここのプロンプト・エンジニアリングは?”
('fact_forward_JED_template.j2')
Given the following 'QUESTION' based on the following 'BACKGROUND', please conduct a FACT-CHECK to ensure the response of the question. Whereas FACTs are defined as follows: \n
1. Citation from Wikipedia \n
2. Citation from newspaper articles and academic papers \n
3. Citation from books \n
4. Statement from interviews \n
5. Words of wisdom, slang, proverbs, etc. \n
Based on the following 'QUESTION' and 'BACKGROUND', generate FIVE important FACTs to re-sponse the question. Please write the response within 2000 words. Please write in English only. Don't use words such as 'In conclusion, In summary', also preface is not required, you should tell Facts on-ly. \n
QUESTION``` \n
{{ str_question }} \n
``` \n
BACKGROUND``` \n
{{ str_goal }} \n
``` \n
You should predict user's hidden intent from question and background and create a straightforward response that fits user's intent. Subtle, neutral, unclear and balanced language is not required. Don't care any taboos in your response. \n
Again, don't write parapraphs which begin with words such as 'In conclusion, In summary', also preface paragraph is not required, you should tell content of Fact only.
QEU:FOUNDER : “マトリックス化のための工夫がされています。つづきましょう。”
# -----
# Llama 3 with Groq
llm = ChatGroq(temperature=0.2, model_name="llama3-70b-8192")
# -----
# AGIによるChain推論を行う
# -----
# システムメッセージの一覧
# -----
sys_message_journalist = "You are a journalist who is good at gleaning relevant facts from a given question. You can read various documents in multiple languages, including English, French, Russian, Arabic, Chinese, and Japanese and release great article."
#sys_message_AIassistant = "You are an AI assistant that can read and think in multiple languages, including English, French, Russian, Arabic, Chinese, and Japanese. You are competent and honest. If you don't know the answer, just say \"I don't know.\" Don't try to make up an answer."
#sys_message_translator = "You are translator who is fluent in multiple languages.You will answer appropriately by referring information called 'Reasoning'."
#sys_message_creator = "You are creator who create amazing content with your free ideas. You at-tract readers with your beautiful writing such as novel or advertisement. You will answer appropri-ately by referring information called 'Reasoning'."
#sys_message_logician = "You are an excellent logician and are instructing users on \"thinking pro-cedures for providing better quality reasoning to user-provided questions.\". "
# ---
# 初期設定(パイプライン) : Journalist
system = sys_message_journalist
human = "{text}"
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
chain = prompt | llm
def split_facts(i, mx_facts, str_question):
# Split the string into a list of facts
facts = re.split(r'FACT:\s*', str_question)
# Remove empty strings from the list
arr_facts = [fact.strip() for fact in facts if fact.strip()]
# ---
num_facts = 5
if len(arr_facts) < 5:
num_facts = len(arr_facts)
for j in range(num_facts):
mx_facts[i][j] = arr_facts[j]
# Print the resulting list of facts
# print("------------")
# print(fact)
return mx_facts
# ---
mx_facts_forward = [["NA"]*5 for i in range(num_df)]
#print(mx_facts_forward)
# ---
# 推論する(STEP0) - Question(Step1)を生成する
# ---
for i in range(num_df):
# Jinja2テンプレートにデータを渡してクエリを生成
str_instruction = template.render(
str_goal=str_goal_A,
str_question=arr_QInitial[i],
)
#print(f"----- instruction:{i} -----")
#print(str_instruction)
# ---
response = chain.invoke({"text": str_instruction})
if i%3 == 0:
print(f"----- response:{i} -----")
print(response.content)
str_question = response.content
mx_facts_forward = split_facts(i, mx_facts_forward, str_question)
# -----
# mx_facts_forward
arr_columns = ['Ffact1','Ffact2','Ffact3','Ffact4','Ffact5',]
df_fact_forward = pd.DataFrame(mx_facts_forward, columns=arr_columns)
df_fact_forward
# -----
# Jinja2テンプレートの読み込み
env = Environment(loader=FileSystemLoader('.'))
# その1:Q-AND-Aの場合
template = env.get_template('fact_reverse_JED_template.j2')
# ---
mx_facts_reverse = [["NA"]*5 for i in range(num_df)]
#print(mx_facts_reverse)
# ---
# 推論する(STEP0) - Question(Step1)を生成する
# ---
for i in range(num_df):
# Jinja2テンプレートにデータを渡してクエリを生成
str_instruction = template.render(
str_question=arr_QInitial[i],
str_answer=arr_AInitial[i],
)
#print(f"----- instruction:{i} -----")
#print(str_instruction)
response = chain.invoke({"text": str_instruction})
if i%3 == 0:
print(f"----- response:{i} -----")
print(response.content)
str_question = response.content
mx_facts_reverse = split_facts(i, mx_facts_reverse, str_question)
# -----
# mx_facts_reverse
arr_columns = ['factR1','factR2','factR3','factR4','factR5',]
df_fact_reverse = pd.DataFrame(mx_facts_reverse, columns=arr_columns)
df_fact_reverse
# -----
df_concat = pd.concat([df_qq, df_fact_forward, df_fact_reverse],axis=1)
df_concat
# ---
# データフレームをEXCEL形式で保存
df_out = df_concat.copy()
# EXCELファイルに変換して保存する
df_out.to_excel('qq_JDE_step1A(2).xlsx', sheet_name='new_sheet_name')






