import os 
import json 
from glob2 import glob 

import shutil 

from tqdm import tqdm 

if __name__=="__main__":

    root_dir_path =  "/data3/icse_dataset/llm_save_data"
    dir_path_list = glob( os.path.join(root_dir_path, "*_out") )

    for dir_path  in tqdm(dir_path_list): 

        # dir_path = "/data3/icse_dataset/llm_save_data/gptzero_me_out_bak"
        dir_path_new =dir_path.replace("llm_save_data","llm_save_data_new2")# "/data3/icse_dataset/llm_save_data/gptzero_me_out"
        os.makedirs(dir_path_new,exist_ok=True)


        fl = glob(os.path.join(dir_path, "*.jsonl") )

        def parse_dict_v1(xpath):
            dict_info = {}
            for dic_str in xpath.split(","):
                k,v = dic_str.split("=")[:2]
                dict_info.update({k:v})
            return dict_info 

        def dict_to_str(xpath_info):
            xpath_info = dict(sorted(xpath_info.items(), reverse=True ) )
            return ",".join( [f"{k}={v}" for k,v in xpath_info.items() ] )

        def parse_none(xpath):
            with open(xpath) as f :
                data = f.readlines()
                data = [json.loads(x) for x in data ]
            for line in tqdm (data):
                task= line["task"]
                task.update({"q":line["queue_name"] } )
                if "r" not in task :
                    task ["r"]= "chatgpt_answer"
                new_path = dict_to_str(xpath_info=task)
                new_path = os.path.join( dir_path_new, new_path+".jsonl")
                # assert os.path.isfile(new_path) , new_path
                with open(new_path,"a") as f :
                    f.write( json.dumps(line) )
                    f.write("\n")


        def parse_json(xpath):
            xpath = os.path.basename(xpath) 
            # print ("xpath", xpath)
            if "None.jsonl"== xpath :
                return 
            xpath = xpath.replace(".jsonl","")

            xpath_info = {}
            # print ("xpath---->", xpath)

            if "{"  in xpath and "}" in  xpath :
                left_part = xpath.split(",{")[0]
                left_part_k,left_part_v = left_part.split('=')[:2]

                # print ("dict," , xpath )
                xpath= xpath.split("{")[-1]
                xpath= xpath.split("}")[0]
                xpath = "{"+xpath+"}"
                xpath = xpath.replace("'",'"')
                xpath_info.update(  json.loads(xpath) )
                xpath_info.update({left_part_k:left_part_v} )
            else:
                xpath_info = parse_dict_v1(xpath)

            if "r" not in xpath_info and "role" in xpath_info :
                xpath_info["r"]=xpath_info["role"]
                del xpath_info["role"]

            if "r" in xpath_info and "role" in xpath_info :
                assert xpath_info["r"] == xpath_info["role"]
                del xpath_info["role"]
            if "r" not in xpath_info and "role" not in xpath_info :
                xpath_info["r"]="chatgpt_answer" 


            # print (xpath_info) 
            xpath =  dict_to_str ( xpath_info = xpath_info )

            return xpath+".jsonl"

        def get_filesize(xpath):
            with open(xpath) as f :
                data= f.readlines()
            return len(data) 


        none_path =  None 
        for old_path  in tqdm(fl) :
            # old_path = os.path.basename(old_path) 
            new_path = parse_json( old_path )
            if new_path is None :
                none_path = old_path
                continue 
            new_path  = os.path.join( dir_path_new , new_path )
            is_old=  os.path.isfile(new_path)
            if is_old:
                old_size = get_filesize( old_path )
                new_size_before = get_filesize( new_path )

            if not is_old :
                shutil.copy(old_path, new_path ) 
                # print (old_path, new_path)
            else:
                old_data= []
                new_data = []

                with open(old_path,"r") as f:
                    old_data = f.readlines()
                    old_data = [json.loads(x) for x in old_data]
                with open(new_path,"r") as f:
                    new_data = f.readlines()
                    new_data = [json.loads(x) for x in new_data]

                with open(new_path,"w") as f:
                    print ("old", len(old_data), "new",len(new_data) )
                    x_data = old_data+new_data

                    f.write( "\n".join( [json.dumps(x) for x in x_data]  ) )


            if is_old:
                new_size = get_filesize( new_path )
                print ("exist:", new_path,  "add.size", old_size, "new_size_before", new_size_before,"new.size", new_size,  )


        # none_path = "/data3/icse_dataset/llm_save_data/writer_com_out_bak/None.jsonl"
        if none_path is not None :
            assert none_path is not None , none_path
            parse_none( xpath = none_path )

