import pandas as pd 
import os 
from itertools import product 

from glob2 import glob 

def dict_mean(dict_list):
    mean_dict = {}
    for key in dict_list[0].keys():
        mean_dict[key] = sum(d[key] for d in dict_list) / len(dict_list)
    return mean_dict

def load_df_list(xpath_list):
    def load_one_df(one_path):
        df = pd.read_csv(one_path)
        print (df.shape ,df.columns)
        
        df["chatgpt_answer_is_delete"] =pd.to_numeric( df["chatgpt_answer_is_delete"])#.replace({True: 1, False: 0})

        # df["chatgpt_answer_is_delete"] = df["chatgpt_answer_is_delete"].astype(bool)
        # df["chatgpt_answer_is_delete"] = df["chatgpt_answer_is_delete"].astype(int)
        ret={
            "chatgpt_answer_ast_pass_all":df["chatgpt_answer_ast_pass_all"].mean(),
            "chatgpt_answer_ast_pass_main":df["chatgpt_answer_ast_pass_main"].mean(),
            "chatgpt_answer_error":df["chatgpt_answer_error"].mean(),
            "chatgpt_answer_as_language":df["chatgpt_answer_is_delete"].mean(),
            
            "human_answer_ast_pass_all":df["human_answer_ast_pass_all"].mean(),
            "human_answer_ast_pass_main":df["human_answer_ast_pass_main"].mean(),
            "human_answer_error":df["human_answer_error"].mean(),
        }
        
        extract_c_chat=  df["chatgpt_answer_extract_c"].tolist()
        extract_c_human=  df["human_answer_extract_c"].tolist()

        return ret,extract_c_chat,extract_c_human
    
    def load_second_df(one_path):
        one_path = one_path.replace(".csv",".txt")
        df = pd.read_csv( one_path )
        return df 

    df_list =[]
    extract_c_chat_list= []
    extract_c_human_list= []
    
    df2_list = []
    
    for one_path in xpath_list:
        ret,extract_c_chat,extract_c_human = load_one_df( one_path = one_path )
        ret,extract_c_chat,extract_c_human = load_one_df( one_path = one_path )
        df_list.append( ret )
        extract_c_chat_list.extend( extract_c_chat )
        extract_c_human_list.extend( extract_c_human )

        df2_list.append ( load_second_df(one_path=one_path) )

    info_meta = dict_mean(dict_list =df_list)

    info_meta.update({
        "extract_h_c": sum(extract_c_human_list)/len(extract_c_human_list),
        "extract_c_c": sum(extract_c_chat_list)/len(extract_c_chat_list),
        })


    df2 = pd.concat(df2_list,ignore_index=True)
    df2 = df2.sum(axis=0)
    df2 = df2. to_dict()
    # print (df2 ,"df2.df ", df2_list, xpath_list)
    # human_not_none,chatgpt_not_none,hum_chat_not_none,raw_size
    info_meta.update({
    "human_not_none": df2["human_not_none"]/float(df2["raw_size"]), 
    "chatgpt_not_none": df2["chatgpt_not_none"]/float(df2["raw_size"]), 
    # "hum_chat_not_none": df2["hum_chat_not_none"]/float(df2["raw_size"]), 
    # "df1": df2_list.to_dict()["human_not_none"],
    # "df2": df2["human_not_none"],
    # "df2_Raw": df2["raw_size"],
    })
    print (df2, "df2", info_meta)
    
    return info_meta 


def load_sta(root_dir , fk1=None):
    
    build_search = f"*task=*{fk1}*.csv"
    fl = glob( os.path.join(root_dir, build_search))
    if len(fl)<=0 :
        return None 
    
    assert len(fl)>0 , os.path.join(root_dir, build_search)
    
    info_meta = load_df_list(xpath_list= fl  )

    # print (info_meta )
    return info_meta 
        
        
if __name__=="__main__":
    
        
    code2doc_list_all= ["_".join([x,y,z]) for x,y,z in product( ["code2doc"],["go","java","javascript","php","python","go"], ["test","valid"]) ]
    code2doc_list= ["_".join([x,y,z]) for x,y,z in product( ["code2doc"],["python"], ["test","valid"]) ]

    doc2code_list_all= ["_".join([x,y,z]) for x,y,z in product( ["doc2code"],["go","java","javascript","php","python","go"], ["test","valid"]) ]
    doc2code_list= ["_".join([x,y,z]) for x,y,z in product( ["doc2code"],["python"], ["test","valid"]) ]
    
    root_dir = "/data3/fse2023_dataset/NL-CCD/codellama2_extract_sta"
    # root_dir = "/data3/fse2023_dataset/NL-CCD/extract_sta"
#

    dic_nlccd = {
        "QA":(["archive_stackexchange"], None ),
        "code2doc":(code2doc_list, None ),
        "code2doc_all":(code2doc_list_all, None ),
        "doc2code":(doc2code_list, None ),
        "doc2code_all":(doc2code_list_all, None ),
        "concode":(["concde_dev"], None  ),
        "apps":(["apps_test"], None ), 
        }
    report_list = []
    for k,v in dic_nlccd.items():
        (fk1,_) = v 
        info_meta = load_sta( root_dir=root_dir, fk1= fk1[0]  )
        if info_meta is None :
            continue 
        info_meta.update({"name":k})
        # print (k, info_meta )
        report_list.append( info_meta )
        
        
    df=  pd.DataFrame( report_list )
    print (df.shape, root_dir)
    df.to_csv("/tmp/report.csv",index=False)
    
        