import re 
import ast 

from io import StringIO
import  tokenize

comment_regex = re.compile(r"/\*([\s\S]*?)\*/")

code_regex3  = re.compile( r"```(?:\w+\n)?([\s\S]*?)```" )

code_regex_fail  = re.compile( r"```(?:\w+\n)?([\s\S]*?)" )



def remove_comments_and_docstrings(source,lang):
    if lang in ['python']:
        """
        Returns 'source' minus comments and docstrings.
        """
        io_obj = StringIO(source)
        out = ""
        prev_toktype = tokenize.INDENT
        last_lineno = -1
        last_col = 0
        for tok in tokenize.generate_tokens(io_obj.readline):
            token_type = tok[0]
            token_string = tok[1]
            start_line, start_col = tok[2]
            end_line, end_col = tok[3]
            ltext = tok[4]
            if start_line > last_lineno:
                last_col = 0
            if start_col > last_col:
                out += (" " * (start_col - last_col))
            # Remove comments:
            if token_type == tokenize.COMMENT:
                pass
            # This series of conditionals removes docstrings:
            elif token_type == tokenize.STRING:
                if prev_toktype != tokenize.INDENT:
            # This is likely a docstring; double-check we're not inside an operator:
                    if prev_toktype != tokenize.NEWLINE:
                        if start_col > 0:
                            out += token_string
            else:
                out += token_string
            prev_toktype = token_type
            last_col = end_col
            last_lineno = end_line
        temp=[]
        for x in out.split('\n'):
            if x.strip()!="":
                temp.append(x)
        return '\n'.join(temp)
    elif lang in ['ruby']:
        return source
    else:
        def replacer(match):
            s = match.group(0)
            if s.startswith('/'):
                return " " # note: a space and not an empty string
            else:
                return s
        pattern = re.compile(
            r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
            re.DOTALL | re.MULTILINE
        )
        temp=[]
        for x in re.sub(pattern, replacer, source).split('\n'):
            if x.strip()!="":
                temp.append(x)
        return '\n'.join(temp)



def remove_comment(code_blocks):
    for i in range(len(code_blocks)):
        code_blocks[i] = comment_regex.sub("", code_blocks[i])
    return code_blocks 

def ast_chk(content ):
    err = None 
    try :
        ast.parse (source = content )
    except :
        err= traceback.format_exc()
        return False , err 
    return True  , err


def extract_code_from_markdown(markdown  ):
    is_grab = False 
    
    code = []
    code_app = []
    for line in markdown.split("\n"):
        lsline = line.lstrip()
        if lsline.startswith("```"):
            is_grab = not is_grab 
            if len(code_app)>0:
                code .append ("\n".join(code_app) )
                code_app=[]
            continue 
        if not is_grab:
            continue 
        
        code_app.append ( line )
    
    if len(code_app)>0:
        code .append ("\n".join(code_app) )
        return code 
    return None

def extract_code_from_markdown_v2(markdown  ):
    if markdown.count("```")%2!=0:
        markdown +="```"
        
    def _extract_code_from_markdown_v2(lines: list[str], *, language: str = "python") -> list[str]:
        """Outputs extracted code blocks from a list of strings of markdown text"""
        regex = re.compile(
            r"(?P<start>```[^\n]*\n)(?P<code>[\s\S]*?)(?P<end>```)",
            re.DOTALL|re.IGNORECASE | re.MULTILINE,
        )
        blocks = [
              match.group("code") 
            for match in regex.finditer("".join(lines).strip())
        ]
        return blocks 

    list_str = _extract_code_from_markdown_v2([markdown], language="python")
    return list_str 
    # print (list_str, "list_str" )
    # return "\n".join(list_str)


# def extract_code_from_markdown(markdown  ):
#
#     code_matches = code_regex3.findall(markdown)
#     if len(code_matches) <=0 :
#         return [markdown] 
#
#     code_matches = [non_tuple_x.strip() for non_tuple_x in code_matches ] # tuple_x -> ("php", code_block)
#     code_matches = [x for x in code_matches if len(x.strip())>0 ]
#
#     ret_code_matches= []
#     for x in code_matches :
#         x_sub  = code_regex_fail.findall(x)
#         x_sub  ="\n".join( [y.strip() for y in x_sub] )
#         ret_code_matches.append( x_sub )
#
#     return ret_code_matches 
#
#     # return "\n\n".join( code_matches )
    

# from  pytest_codeblocks  import extract_from_buffer 
# import  io 
# def extract_code_from_markdown_v2(markdown   ):
#     find_list= []
#     if  not (  (hasattr(markdown, 'read') and hasattr(markdown, 'write')) ):
#             with io.StringIO() as f_mock:
#                 f_mock.write(markdown )
#                 f_mock.seek(0)
#                 find_list=   extract_from_buffer(f_mock )
#     else:
#         find_list=  extract_from_buffer(markdown)
#
#     final_ret = [c.code  for c in find_list ] 
#     return final_ret 
#     # return "\n".join(final_ret  )
#





# copy from https://raw.githubusercontent.com/dhruvmanila/remove-print-statements/main/remove_print_statements.py
# pip install libcst 

from typing import Mapping

import libcst as cst
import libcst.matchers as m
from libcst.codemod import (
    CodemodContext,
    ContextAwareTransformer,
    TransformExit,
    TransformFailure,
    TransformSuccess,
    transform_module,
)
from libcst.metadata import PositionProvider


class RemovePrintStatements(ContextAwareTransformer):
    DESCRIPTION: str = "Remove all the print statements"
    METADATA_DEPENDENCIES = (PositionProvider,)

    # A matcher for the print statement.
    PRINT_STATEMENT = m.Expr(
        value=m.Call(
            func=m.Name(
                value="print",
            ),
        ),
    )

    def __init__(
        self, context: CodemodContext, *, dry_run: bool = False, verbose: bool = False
    ) -> None:
        super().__init__(context)
        self.dry_run = dry_run
        self.verbose = verbose
        self.print_statement_count = 0
        self._print_statements: dict[int, str] = {}

    @property
    def print_statements(self) -> Mapping[int, str]:
        """Return all the print statements in their code representation along with
        the line number information."""
        return self._print_statements

    @m.call_if_inside(PRINT_STATEMENT)
    def visit_Expr(self, node: cst.Expr) -> None:
        self.print_statement_count += 1
        # if self.verbose:
        #     pos = self.get_metadata(PositionProvider, node, None)
        #     if pos is not None:
        #         self._print_statements[pos.start.line] = self.module.code_for_node(node)

    @m.call_if_inside(PRINT_STATEMENT)
    def leave_Expr(
        self, original_node: cst.Expr, updated_node: cst.Expr
    ) -> cst.Expr :#| cst.RemovalSentinel:
        # if self.dry_run:
        #     return updated_node
        return cst.RemoveFromParent()



def remove_pattern_print(
    code: str,
    # dry_run: bool = False,
    verbose: bool = False,
) -> bool:

    codemod = RemovePrintStatements(
        context=CodemodContext(filename="123.py") , 
        # dry_run=dry_run,
        verbose=verbose,
    )
    result = transform_module(codemod, code=code)
    if isinstance(result, TransformSuccess):
        if codemod.print_statement_count:
            return result.code 
        
    return code


import re 
def remove_pattern_line_startwith_assert(
    code: str,
) -> str :

    string_1 = re.sub("^[^A-Z]+", "", string_1)

    codemod = RemovePrintStatements(
        context=CodemodContext(filename="123.py") , 
        # dry_run=dry_run,
        verbose=verbose,
    )
    result = transform_module(codemod, code=code)
    if isinstance(result, TransformSuccess):
        if codemod.print_statement_count:
            return result.code 
        
    return code




finding = r"[ai|a]+\slanguage\smodel"
def cognise_as_ai_language_model(content):
    
    ret = re.findall(finding,content, flags=re.IGNORECASE|re.MULTILINE)
    if ret is not None and  len(ret)>0:
        return True 
    return False 


