import sys

import os 
import json 
import traceback 
import jmespath 
import numpy as np 

# exit()

def parse_dict_v1(xpath):
    dict_info = {}
    dict_info["path"]=xpath 
    xpath = os.path.basename(xpath)
    
    xpath = xpath.replace(".jsonl","")
    for dic_str in xpath.split(","):
        k,v = dic_str.split("=")[:2]
        dict_info.update({k:v})
    
    task = dict_info["task"]
    split = task.split("_")[-1]
    dict_info["split"] =split 
    task = task.replace("_"+dict_info["split"] , "")
    dict_info["lang"]=  task.split("_")[-1] if "_" in task else None 
    dict_info["name"]=  task.split("_")[0] 
    if "mt" not in dict_info :
        dict_info["mt"]="baseline"
    if dict_info["name"]=="apps":
        dict_info["lang"]="python"

    if dict_info["task"]=="archive_stackexchange":
        dict_info["split"]="test"
        dict_info["lang"]=None
        dict_info["name"]=dict_info["task"]
        
    if "r" not in dict_info and "role" in dict_info :
        dict_info ["r"] =dict_info ["role"]
    if "r"  in dict_info and "role" not in dict_info :
        dict_info ["role"] =dict_info ["r"]
    
    return dict_info 




if __name__=="__main__":
    p= sys.argv[-1]
    assert os.path.isfile(p) , p 
    assert ".jsonl" in p , p 
    p_base = os.path.basename(p)
    meta=  parse_dict_v1( xpath=os.path.basename(p) ) 
    with open( p ) as f :
        data=  f.readlines()
        data= [json.loads(x ) for x in data ]
    
    role = meta["r"]
    xdata = [{"idx":x["sql_id"],"xpath":p_base,   "content":x[role]} for x in data ]
    folder_name =os.path.basename( os.path.dirname(p) )
    assert folder_name in ["CodeLlama-34b-Instruct-hf_extract",
            "WizardCoder-Python-34B-V1.0_extract",
            "WizardCoder-15B-V1.0_extract",
            "extract_rm_languagemodel_baseline_q",
            ]

    with open(f"./all_request_{folder_name}.jsonl","a") as f :
        f.write( "\n".join(  [json.dumps(x) for x in xdata ] ) +"\n")
   #     f.write("\n")


