import json 
from tqdm import tqdm 

template_system=lambda lang:{
    "role":"system",
    "content":'Generate source code of class member functions in Java,'\
    'given natural language description and class environment. Class environment is the programmatic context provided by the rest of the class,'\
    'including other member variables and member functions in class.'\
    'Please only reply with a code block and avoid providing any explanations, comments, imports, or additional text. ',
    "name":"system",
    }



def build_template_user(content):
    nl_desc = ""
    nl_class = ""
    
    if "concode_field_sep" in content :
        nl_desc_idx = content.index("concode_field_sep")
        nl_desc = content[:nl_desc_idx]
        nl_class  = content[nl_desc_idx+len("concode_field_sep"):]
        
    template_user={
        "role":"user",
        "content":"The nature language description is: {}; The member variables and member functions is {}".format(nl_desc,nl_class ) , 
        "name":"user",
        }
    return template_user

def read_from_url(role="dev"):
    import requests 
    url="https://raw.githubusercontent.com/microsoft/CodeXGLUE/main/Text-Code/text-to-code/dataset/concode/{}.json".format(role)
    raw_code = requests.get(url).content .decode("utf-8")
    # cur_md5 = hex_md5(raw_code )
    # if cur_md5!=md5 :
    #     print (cur_md5,"cur")
    #     logging.warning("the md5 of your donwloads has slightly different with your declaration, please check it ")
    print ("raw_code.len", len(raw_code) )
    data_list= []
    for item in raw_code.split("\n"):
        if  len(item)<=0:
            continue
        item = json .loads(item )
        # print (item )
        data_list.append(item)

    return data_list

def build_zeroshot(ideal="",user_content="",task_id="" ):
    
    ret = {
        "input":[
            template_system(None),
            build_template_user(user_content)
            ],
        "task_id":task_id,
        "idx":task_id,
        "ideal":ideal,
        }
    return ret 
    

for  str_role in tqdm( ["test","train","dev"] ) :
    dt_test  =read_from_url (role=str_role )
    
    # features: ['problem_id', 'question', 'solutions', 'input_output', 'difficulty', 'url', 'starter_code'],
    app_list = []
    for  idx,item in enumerate(dt_test):
        item_str = build_zeroshot(ideal=item["code"], user_content=item["nl"], task_id ="concode_{}".format(idx) )
        app_list.append ( json.dumps(item_str ) )
    
    
    with open ("/data3/icse_dataset/raw_data/concde_{}.jsonl".format(str_role) ,"w") as f :
        f.write("\n".join(app_list)) 
    
