import ast
import astor

 
import json 
import os 
from concurrent.futures import ThreadPoolExecutor
import concurrent 

import traceback 

import logging 
logger = logging.getLogger(__file__)
# Setup logging
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt='%m/%d/%Y %H:%M:%S',
                    level=logging.INFO)

import sys 
sys.path.append("/home/wj_cuda113/wj_code/dl_chatgpt/CodeGen")
import codegen_sources.preprocessing.lang_processors as lp


def ast_chk(content_list ):
    def _ast(content):
        err = None 
        try :
            ast.parse (source = content )
        except :
            err= traceback.format_exc()
            return False , err 
        return True  , err

    ret= [(x, _ast(content=x) )  for x in content_list ]
    ret = [(code,flg,err) for code,(flg,err) in ret  ]
    return ret 


def load_jsonl(p):
    with open(p) as f :
        data= [json.loads(x) for x in f.readlines()]
        return data 
    


if __name__=="__main__":
    import sys 
    cur_dir = os.path.dirname ( os.path.dirname(__file__) )
    
    sys.path.append(cur_dir )
    print (sys.path,cur_dir)
    import numpy as np 
    import itertools 
    from tqdm import tqdm 
    import random 
    import copy 
    
    # from mutations  import processor as mt_producsor  


    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("-path", type=str,
                        help="display a square of a given number")
    # parser.add_argument("-filter_python", action="store_true",
    #                     help="display a square of a given number")
    # parser.add_argument("-human_extract", action="store_true",
    #                     help="display a square of a given number")
    # parser.add_argument("-chatgpt_extract", action="store_true",
    #                     help="display a square of a given number")
    parser.add_argument("-save_dir", type=str,
                        help="display a square of a given number")
    
    args=  parser.parse_args()

    print (args )
    # root_dir = "/data3/icse_dataset/NL-CCD_dirs/raw/retrain_processed"
    # root_dir_replace = "/data3/icse_dataset/NL-CCD_dirs/code_extract/retrain_processed_if_err_left_empty"
    # root_dir = "/data3/icse_dataset/NL-CCD_dirs/NL-CCD-new/python"
    # root_dir_replace = "/data3/icse_dataset/NL-CCD_dirs/NL-CCD-new/extract"
    root_dir_replace = args.save_dir
    # filter_py = args.filter_python
    json_p = args.path 
    # chatgpt_extract = args . chatgpt_extract
    # human_extract = args . human_extract
    

    assert os.path.isfile(json_p), json_p 
    
    os.makedirs(root_dir_replace, exist_ok=True )
    

    
                
    num_workers = os.cpu_count()-1 

    # for task,filter_py  in file_list :
    if 1==1:
        
        vides=  load_jsonl(json_p )    

        new_save_path = os.path.join(root_dir_replace,  os.path.basename(json_p ) )
        assert  not os.path.isfile(new_save_path), new_save_path
        # vides = vides[:100]
        ## 
          
        def process_file(processor, i, role="chatgpt_answer"):
            try :
                return _process_file(processor=processor, i=i,role=role)
            except Exception as ex :
                ex = traceback.format_exc()
                logger.info(ex)
            
        def _process_file(processor, i,role="chatgpt_answer"):
            data = vides [i]
            # data = json.loads(content)
            new_info = copy.deepcopy(data)

            idx = data["id"]
            # ###### process human_answer
            # code = data["human_answer"]
            # ast_info={
            #     "raw_pass":0,
            #     "mt_pass":0,
            #     }
            #
            # # with open("/tmp/run_code.py","w") as fff :
            # #     fff.write(code)
            # #     fff.write( json.dumps(data) )
            # try :
            #     code_mt = processor(code=code)
            # except Exception as ex :
            #     logger.info( "idx:{} role:human err_msg:{}".format(idx,ex ))
            #     code_mt = None 
            #
            # ast_list_msg = ast_chk(  content_list=[code,code_mt] )
            # ast_info["raw_pass"]= ast_list_msg[0][1]
            # ast_info["mt_pass"]= ast_list_msg[1][1]
            #
            # new_info["human_answer"]=   code_mt
            # new_info["human_answer_raw"]= code 
            # new_info["human_answer_flags"]= ast_info
                
            
            
            code = data[role]
            # idx = idx.replace("/","__")

        
            ###### process chatgpt_answer
            
            ast_info={
                "raw_pass":0,
                "mt_pass":0,
                }
            
            try :
                code_mt,_ = processor.obfuscate_code(code)
            except Exception as ex:
                ex = traceback.format_exc()
                logger.info( "idx:{} role:{} err_msg:{}".format(idx,role, ex ))
                code_mt = None 
                
            ast_list_msg = ast_chk(  content_list=[code,code_mt] )
            ast_info["raw_pass"]= ast_list_msg[0][1]
            ast_info["mt_pass"]= ast_list_msg[1][1]
            
            new_info[role]=   code_mt
            new_info[f"{role}_raw"]= code 
            new_info[f"{role}_flags"]= ast_info
                
                
            return new_info 
         
             

        MT_OPERATE_RENAME_LIST=[]
            
        MT_OPERATE_EXPR_LIST=[
            "fb_obfuscator",
            ]
   
                
        print ("start workers")
        # for role in ["chatgpt_answer",]:
        for role in ["chatgpt_answer","human_answer"]:
        
            for x_name in MT_OPERATE_RENAME_LIST+MT_OPERATE_EXPR_LIST:
                
                with tqdm(total=len(vides)) as pbar:
                
                    with ThreadPoolExecutor(max_workers=num_workers) as ex:
                
                
                        #init processor 
                        # x_name = "rename_var"
                        # x_processor = mt_producsor.MutateModel(rate=0.8, op_names = [x_name] )
                        x_processor= lp.PythonTreeSitterProcessor()
                        
                
                        # predictions = ex.map(process_file, range(len(vides)))
                        # predictions = list(tqdm(ex.map(process_file, range(len(vides))), total=len(vides)))
                        my_iter = range(len(vides))
                        futures = {ex.submit(process_file, x_processor ,  arg, role ): arg for arg in my_iter}
                        predictions = []
                        for future in concurrent.futures.as_completed(futures):
                            predictions.append(  future.result() )
                            pbar.update(1)
                
                
                predictions= list(predictions )
                x_basename = os.path.basename(json_p) 
                x_basename = "r={},mt={},".format(role,x_name)+x_basename
                
                new_save_path  = os.path.join(root_dir_replace, x_name, x_basename )
                os.makedirs( os.path.dirname(new_save_path) ,exist_ok=True )
                with open( new_save_path ,"w") as fff:
                    fff.write("\n".join(
                            [ json.dumps(x) for x in predictions ]
                            ) )
            #


        # x_name = "rename_var"
        # x_processor = mt_producsor.MutateModel(rate=0.8, op_names = [x_name] )
        # futures = process_file( x_processor ,  10)
        # print ( vides[10])
        # print (futures)
