背景

某天突然有个需求,要求统计gitlab上所有工程所有分支的提交日志,于是拿出python动手撸了段脚本。

脚本内容及注释

通过gitlab原生的api来获取当前账号有权限的所有工程、分支和每个分支提交日志,并把数据入库(sqlite);

用gitlab原生的api好处就是不用把代码拉下来,再通过git log来查看提交日志;网上大部分方法都是用git log来统计提交日志,但是如果工程和分支太多了就比较麻烦。

  • python版本:3.6.0
# -*- coding:utf-8 -*-

import requests,json,sqlite3,uuid

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}
#gitlab地址
git_url='http://x.x.x.x'
#gitlab的token
git_token='xxxxxxxxxxxxxxx'

session = requests.Session()
headers['PRIVATE-TOKEN']=git_token
session.headers = headers
git_login=session.get(git_url,headers=headers)

#获取当前账号有权限的全部工程列表
def gitlab_projects():
    print("正在获取gitlab上工程数量...")
    projects_api=git_url+'/api/v4/projects?simple=yes&per_page=20'
    projects_headers=session.head(projects_api).headers
    projects_num=int(projects_headers['X-Total'])
    projects_pages=int(projects_headers['X-Total-Pages'])
    # print(projects_headers)
    print("工程总数:",projects_num)
    cursor = conn.cursor()
    cursor.execute('create table if not exists gitlab_projects(id varchar(8),name varchar(128),desc varchar(256),path varchar(128),create_at varchar(64),default_branch varchar(64),branch_num varchar(16),ssh_url_to_repo varchar(128),web_url varchar(128),PRIMARY KEY ("id"))')
    cursor.execute('delete from gitlab_projects')
    for i in range(projects_pages):
        pages=i+1
        projects_url=projects_api+'&page='+str(pages)
        projects = session.get(projects_url).text
        # print(projects)
        projects_json = json.loads(projects)
        for project_json in projects_json:
            project_id=project_json['id']
            project_name=project_json['name']
            project_desc=project_json['description']
            project_path=project_json['path_with_namespace']
            project_create_at=project_json['created_at']
            # project_create_at=project_create_at[0:19].replace('T',' ')
            project_default_branch =project_json['default_branch']
            project_ssh_url_to_repo=project_json['ssh_url_to_repo']
            project_web_url=project_json['web_url']
            cursor.execute('insert into gitlab_projects values(?,?,?,?,?,?,?,?,?)',(project_id,project_name,project_desc,project_path,project_create_at,project_default_branch,'null',project_ssh_url_to_repo,project_web_url))
    cursor.close()
    conn.commit()
    print("工程获取完成")

#获取工程分支
def gitlab_project_branchs(project_list):
    print("获取工程分支信息...")
    cursor = conn.cursor()
    cursor.execute('create table if not exists gitlab_project_branch(id varchar(8),name varchar(128),branch_name varchar(128))')
    cursor.execute('delete from gitlab_project_branch')
    project_list=project_list
    for project_info in project_list:
        project_id=project_info[0]
        project_name=project_info[1]
        # print(project_id,project_name)
        project_branchs_api = git_url + '/api/v4/projects/'+project_id+'/repository/branches'
        project_branchs=session.get(project_branchs_api).text
        project_branchs_json=json.loads(project_branchs)
        project_branchs_num=len(project_branchs_json)
        # print(project_branchs_num)
        for project_branchs in project_branchs_json:
            prject_branch=project_branchs['name']
            cursor.execute('insert into gitlab_project_branch values(?,?,?)',(project_id,project_name,prject_branch))
        cursor.execute('update gitlab_projects set branch_num=? where id=?',(project_branchs_num,project_id))
    cursor.close()
    conn.commit()
    print("分支信息获取完成")

#增量获取所有工程所有分支的提交日志
def gitlab_project_commits(project_list):
    print("获取工程commit日志...")
    cursor = conn.cursor()
    cursor.execute('create table if not exists gitlab_project_commits(xmlid varchar(64),id varchar(8),name varchar(128),branch_name varchar(128),title varchar(512),additions varchar(8),deletions varchar(8),create_at varchar(32),author_name varchar(32),author_email varchar(64),PRIMARY KEY ("xmlid"))')
    # cursor.execute('delete from gitlab_project_commits')
    cursor.close()
    project_list=project_list
    for project_info in project_list:
        project_id=project_info[0]
        project_name=project_info[1]
        # print(project_id,project_name)
        cursor = conn.cursor()
        cursor.execute('select branch_name from gitlab_project_branch where id=? and name=?',(project_id,project_name))
        project_branch_list = cursor.fetchall()
        for project_branch in project_branch_list:
            project_branch_name=project_branch[0]
            cursor.execute('select max(create_at) from gitlab_project_commits where id=? and branch_name=?',(project_id, project_branch_name))
            max_create_at= cursor.fetchall()[0][0]
            since = max_create_at
            if since:
                project_branchs_commit_api = git_url + '/api/v4/projects/' + project_id + '/repository/commits?per_page=50&ref_name='+project_branch_name+'&since='+since
            else:
                project_branchs_commit_api = git_url + '/api/v4/projects/' + project_id + '/repository/commits?per_page=50&ref_name='+ project_branch_name
            # print(project_branchs_commit_api)
            project_branchs_commit_headers = session.head(project_branchs_commit_api).headers
            projects_num = int(project_branchs_commit_headers['X-Total'])
            projects_pages = int(project_branchs_commit_headers['X-Total-Pages'])
            print("正在增量获取" + project_name + "的"+project_branch_name+"分支的"+str(projects_num)+"条commit日志")
            for i in range(projects_pages):
                page=i+1
                project_branchs_commit_api_page=project_branchs_commit_api+'&page='+str(page)
                # print(project_branchs_commit_api_page)
                project_branchs = session.get(project_branchs_commit_api_page).text
                project_branchs_commit_json = json.loads(project_branchs)
                for project_branch_commit_json in project_branchs_commit_json:
                    # print(project_branch_commit_json)
                    #id,name,branch_name,title ,create_at ,author_name,author_email
                    commit_add=0
                    commit_del=0
                    commit_id=project_branch_commit_json['id']
                    # title里面提交可能只有部分,因此使用message来获取完整的提交日志内容
                    # commit_title=project_branch_commit_json['title']
                    commit_title=project_branch_commit_json['message']
                    commit_create_at=project_branch_commit_json['created_at']
                    # commit_create_at=commit_create_at[0:19].replace('T',' ')
                    commit_author_name=project_branch_commit_json['author_name']
                    commit_author_email=project_branch_commit_json['author_email']
                    #start 获取每个commit的增加和删除行,太慢了,暂时注释掉
                    # project_branchs_commit_info_url=git_url+ '/api/v4/projects/' + project_id + '/repository/commits/'+commit_id
                    # project_branchs_commit_info_re=session.get(project_branchs_commit_info_url).text
                    # project_branchs_commit_info_json=json.loads(project_branchs_commit_info_re)
                    # commit_add=project_branchs_commit_info_json['stats']['additions']
                    # commit_del=project_branchs_commit_info_json['stats']['deletions']
                    #end
                    uuid_key=str(uuid.uuid1()).replace('-','')
                    cursor.execute('insert into gitlab_project_commits values(?,?,?,?,?,?,?,?,?,?)',(uuid_key,project_id,project_name,project_branch_name,commit_title,commit_add,commit_del,commit_create_at,commit_author_name,commit_author_email))
        cursor.close()
        conn.commit()
    print("分支信息获取完成")

if __name__ == "__main__":
    conn = sqlite3.connect('gitlab.db')

    #获取gitlab上的工程列表
    gitlab_projects()

    #获取gitlab_projects的id和name
    cursor = conn.cursor()
    cursor.execute('select id,name from gitlab_projects')
    project_list = cursor.fetchall()

    #获取gitlab上每个工程的分支列表
    gitlab_project_branchs(project_list)

    #增量获取gitlab上每个工程的分支的commit记录
    gitlab_project_commits(project_list)
    conn.close()

#获取最近6个月的提交数据
#select name,branch_name,count(*) from gitlab_project_commits where date(create_at)>date(datetime('now','localtime'),'-6 month') group by name,branch_name;
#select date(datetime('now','localtime'),'-6 month');

  • 备注

gitlab的api默认返回数据是20条,比如:http://x.x.x.x/api/v4/projects?simple=yes返回的数据是20条;

可以通过per_page参数来定义返回的数据条数,如果要返回30条数据:http://x.x.x.x/api/v4/projects?simple=yes&per_page=30

如果要查看所有的数据,需要查看api返回的headers里面的数据,X-Total为数据的总条数,X-Total-Pages为一共有多少页数据,如要以每页30条数据查看第2页的数据:http://x.x.x.x/api/v4/projects?simple=yes&per_page=30&page=2

结束

说实话统计代码的提交量是毫无意义的,主要还是要看代码质量;

代码有点烂,将就看了,主要是备份下,下次需要的时候再拿出来用。