
CREATE TEMP FUNCTION json2array(json STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
  return JSON.parse(json).map(x=>JSON.stringify(x));
"""; 

SELECT  repo.id as repo_id ,
  repo.name as repo_name, 
  json_extract(pr,"$.merged_at") as merged_at ,
  json_extract(pr,"$.base.sha") as base_sha ,
  json_extract(pr,"$.head.sha") as head_sha,
  json_extract(pr,"$.html_url") as html_url ,
  json_extract(pr,"$.diff_url") as diff_url ,
  json_extract(pr,"$.patch_url") as patch_url ,
  message ,
  repo_language,
  license

from (select repo,   json_extract(payload,"$.pull_request")  as pr,
    lower( concat(json_extract(payload,"$.pull_request.title") ,"\t\t\t", 
    json_extract(payload,"$.pull_request.body") ) ) as message, 
    json_extract(payload,"$.pull_request.base.repo.language")  as repo_language, 
    json_extract(payload, "$.pull_request.base.repo.license.key") as license ,
   FROM   (  

    select * from  `githubarchive.day.2017*` union all select * from  `githubarchive.day.2018*` union all select * from  `githubarchive.day.2019*` union all select * from  `githubarchive.day.2020*` union all select * from  `githubarchive.day.2021*`  union all select * from  `githubarchive.day.2022*` 


   ) where  type="PullRequestEvent" 
        and cast(json_extract(payload,"$.pull_request.commits") as  int)<5  
        and  cast(json_extract(payload,"$.pull_request.merged") as bool)=true  
        and lower(json_extract(payload, "$.pull_request.base.repo.language")) in ('"c"','"c++"','"objective-c"','"objective-c++"') 
        and lower(json_extract(payload, "$.pull_request.base.ref")) in ('"main"','"master"') 
        and cast(json_extract(payload, "$.pull_request.base.repo.stargazers_count") as int) >200 
        and cast(public as bool)=true   
        and (json_extract(payload, "$.pull_request.base.repo.license.key") is null or  REGEXP_CONTAINS( lower( json_extract(payload, "$.pull_request.base.repo.license.key") ) , r'mit|artistic|isc|cc|epl|mpl|unlicense|apache|bsd|agpl|lgpl|bsd' )  )

        )
  where    length(message) > 4 and ( CONTAINS_SUBSTR(message, 'fix')  or CONTAINS_SUBSTR(message, 'solve')  or CONTAINS_SUBSTR(message, 'repair') )  and ( CONTAINS_SUBSTR(message, 'bug')  or CONTAINS_SUBSTR(message, 'issue')  or CONTAINS_SUBSTR(message, 'problem') or CONTAINS_SUBSTR(message, 'error')  or CONTAINS_SUBSTR(message, ' fault')  or CONTAINS_SUBSTR(message, 'vulnerab') )

  

      
        




