import json 


template_system=lambda lang:{
    "role":"system",
    "content":"Please complete the python code generation for the following "\
        "question, there may have some example test cases, and "\
        "you can use them to evaluate the generated code. "\
        "Do not provide any explanations, comments, test cases or "\
        "additional text, only output the completed python code in "\
        "a markdown style and nothing else.",
    "name":"system",
    }

template_user_testcase= lambda item :item["prompt"]+"\n\nThe testcases: "+ item["test"] 

template_user=lambda item:{
    "role":"user",
    "content":template_user_testcase(item) , 
    "name":"user",
    }



def build_zeroshot(lang,task_id,item  ):
    
    assert type(item["test"])== str ,type(item["test"])
    
    ret = {
        "input":[
            template_system(lang),
            template_user(item)
            ],
        "task_id":task_id,
        "human_answer":item["canonical_solution"] if type(item["canonical_solution"])!=list else item["canonical_solution"][0],
        "idx":task_id,
        "ideal":item["canonical_solution"] if type(item["canonical_solution"])!=list else item["canonical_solution"][0],
        "sql_id":f"text-code/{task_id}",
        }
    return ret 
    
# from datasets import load_dataset
# dataset = load_dataset("codeparrot/apps")
    
    

    
# from datasets import load_dataset
# dataset = load_dataset("codeparrot/apps")
dt_test = []


OPENAI_HUMANEVAL_URL="https://github.com/openai/human-eval/raw/master/data/HumanEval.jsonl.gz"
import requests 
import gzip
from io import BytesIO as StringIO
response = requests.get(OPENAI_HUMANEVAL_URL) 
compressedFile = StringIO()
compressedFile.write(response.content )
compressedFile.seek(0)

with gzip.GzipFile(fileobj=compressedFile, mode="rb") as fp :
    for line in fp.readlines() :
        line = line.decode("utf-8")
        if any(not x.isspace() for x in line):
            dt_test.append( json.loads(line) )
# self. data=  iter(self. data)

assert len(dt_test)>0 and len(dt_test)==164 , len(dt_test)


for str_role in ["train"]:
    # dt_test = dataset[ str_role ]
    
    app_list = []
    for  idx,item in enumerate(dt_test):
        # print ("<---",item["test"],"--->")
        # item["test"] = json.loads(item["test"]) if len(item["test"])>0 else []
        item_str = build_zeroshot(lang="python", item=item, task_id ="humaneval_001/default/{}.input".format( item["task_id"] )  )
        app_list.append ( json.dumps(item_str ) )
    
    
    with open ("/data3/icse_dataset/raw_data/humaneval_{}.jsonl".format(str_role) ,"w") as f :
        f.write("\n".join(app_list)) 
        
    
    with open ("/data3/icse_dataset/wj_build_prompt_data/humaneval_{}.jsonl".format(str_role) ,"w") as f :
        f.write("\n".join(app_list)) 
        
