python3通过gitlab原生api来获取gitlab上的工程名、分支、提交日志
文章目录
背景
某天突然有个需求,要求统计gitlab上所有工程所有分支的提交日志,于是拿出python动手撸了段脚本。
脚本内容及注释
通过gitlab原生的api来获取当前账号有权限的所有工程、分支和每个分支提交日志,并把数据入库(sqlite);
用gitlab原生的api好处就是不用把代码拉下来,再通过git log来查看提交日志;网上大部分方法都是用git log来统计提交日志,但是如果工程和分支太多了就比较麻烦。
- python版本:3.6.0
# -*- coding:utf-8 -*-
import requests,json,sqlite3,uuid
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}
#gitlab地址
git_url='http://x.x.x.x'
#gitlab的token
git_token='xxxxxxxxxxxxxxx'
session = requests.Session()
headers['PRIVATE-TOKEN']=git_token
session.headers = headers
git_login=session.get(git_url,headers=headers)
#获取当前账号有权限的全部工程列表
def gitlab_projects():
print("正在获取gitlab上工程数量...")
projects_api=git_url+'/api/v4/projects?simple=yes&per_page=20'
projects_headers=session.head(projects_api).headers
projects_num=int(projects_headers['X-Total'])
projects_pages=int(projects_headers['X-Total-Pages'])
# print(projects_headers)
print("工程总数:",projects_num)
cursor = conn.cursor()
cursor.execute('create table if not exists gitlab_projects(id varchar(8),name varchar(128),desc varchar(256),path varchar(128),create_at varchar(64),default_branch varchar(64),branch_num varchar(16),ssh_url_to_repo varchar(128),web_url varchar(128),PRIMARY KEY ("id"))')
cursor.execute('delete from gitlab_projects')
for i in range(projects_pages):
pages=i+1
projects_url=projects_api+'&page='+str(pages)
projects = session.get(projects_url).text
# print(projects)
projects_json = json.loads(projects)
for project_json in projects_json:
project_id=project_json['id']
project_name=project_json['name']
project_desc=project_json['description']
project_path=project_json['path_with_namespace']
project_create_at=project_json['created_at']
# project_create_at=project_create_at[0:19].replace('T',' ')
project_default_branch =project_json['default_branch']
project_ssh_url_to_repo=project_json['ssh_url_to_repo']
project_web_url=project_json['web_url']
cursor.execute('insert into gitlab_projects values(?,?,?,?,?,?,?,?,?)',(project_id,project_name,project_desc,project_path,project_create_at,project_default_branch,'null',project_ssh_url_to_repo,project_web_url))
cursor.close()
conn.commit()
print("工程获取完成")
#获取工程分支
def gitlab_project_branchs(project_list):
print("获取工程分支信息...")
cursor = conn.cursor()
cursor.execute('create table if not exists gitlab_project_branch(id varchar(8),name varchar(128),branch_name varchar(128))')
cursor.execute('delete from gitlab_project_branch')
project_list=project_list
for project_info in project_list:
project_id=project_info[0]
project_name=project_info[1]
# print(project_id,project_name)
project_branchs_api = git_url + '/api/v4/projects/'+project_id+'/repository/branches'
project_branchs=session.get(project_branchs_api).text
project_branchs_json=json.loads(project_branchs)
project_branchs_num=len(project_branchs_json)
# print(project_branchs_num)
for project_branchs in project_branchs_json:
prject_branch=project_branchs['name']
cursor.execute('insert into gitlab_project_branch values(?,?,?)',(project_id,project_name,prject_branch))
cursor.execute('update gitlab_projects set branch_num=? where id=?',(project_branchs_num,project_id))
cursor.close()
conn.commit()
print("分支信息获取完成")
#增量获取所有工程所有分支的提交日志
def gitlab_project_commits(project_list):
print("获取工程commit日志...")
cursor = conn.cursor()
cursor.execute('create table if not exists gitlab_project_commits(xmlid varchar(64),id varchar(8),name varchar(128),branch_name varchar(128),title varchar(512),additions varchar(8),deletions varchar(8),create_at varchar(32),author_name varchar(32),author_email varchar(64),PRIMARY KEY ("xmlid"))')
# cursor.execute('delete from gitlab_project_commits')
cursor.close()
project_list=project_list
for project_info in project_list:
project_id=project_info[0]
project_name=project_info[1]
# print(project_id,project_name)
cursor = conn.cursor()
cursor.execute('select branch_name from gitlab_project_branch where id=? and name=?',(project_id,project_name))
project_branch_list = cursor.fetchall()
for project_branch in project_branch_list:
project_branch_name=project_branch[0]
cursor.execute('select max(create_at) from gitlab_project_commits where id=? and branch_name=?',(project_id, project_branch_name))
max_create_at= cursor.fetchall()[0][0]
since = max_create_at
if since:
project_branchs_commit_api = git_url + '/api/v4/projects/' + project_id + '/repository/commits?per_page=50&ref_name='+project_branch_name+'&since='+since
else:
project_branchs_commit_api = git_url + '/api/v4/projects/' + project_id + '/repository/commits?per_page=50&ref_name='+ project_branch_name
# print(project_branchs_commit_api)
project_branchs_commit_headers = session.head(project_branchs_commit_api).headers
projects_num = int(project_branchs_commit_headers['X-Total'])
projects_pages = int(project_branchs_commit_headers['X-Total-Pages'])
print("正在增量获取" + project_name + "的"+project_branch_name+"分支的"+str(projects_num)+"条commit日志")
for i in range(projects_pages):
page=i+1
project_branchs_commit_api_page=project_branchs_commit_api+'&page='+str(page)
# print(project_branchs_commit_api_page)
project_branchs = session.get(project_branchs_commit_api_page).text
project_branchs_commit_json = json.loads(project_branchs)
for project_branch_commit_json in project_branchs_commit_json:
# print(project_branch_commit_json)
#id,name,branch_name,title ,create_at ,author_name,author_email
commit_add=0
commit_del=0
commit_id=project_branch_commit_json['id']
# title里面提交可能只有部分,因此使用message来获取完整的提交日志内容
# commit_title=project_branch_commit_json['title']
commit_title=project_branch_commit_json['message']
commit_create_at=project_branch_commit_json['created_at']
# commit_create_at=commit_create_at[0:19].replace('T',' ')
commit_author_name=project_branch_commit_json['author_name']
commit_author_email=project_branch_commit_json['author_email']
#start 获取每个commit的增加和删除行,太慢了,暂时注释掉
# project_branchs_commit_info_url=git_url+ '/api/v4/projects/' + project_id + '/repository/commits/'+commit_id
# project_branchs_commit_info_re=session.get(project_branchs_commit_info_url).text
# project_branchs_commit_info_json=json.loads(project_branchs_commit_info_re)
# commit_add=project_branchs_commit_info_json['stats']['additions']
# commit_del=project_branchs_commit_info_json['stats']['deletions']
#end
uuid_key=str(uuid.uuid1()).replace('-','')
cursor.execute('insert into gitlab_project_commits values(?,?,?,?,?,?,?,?,?,?)',(uuid_key,project_id,project_name,project_branch_name,commit_title,commit_add,commit_del,commit_create_at,commit_author_name,commit_author_email))
cursor.close()
conn.commit()
print("分支信息获取完成")
if __name__ == "__main__":
conn = sqlite3.connect('gitlab.db')
#获取gitlab上的工程列表
gitlab_projects()
#获取gitlab_projects的id和name
cursor = conn.cursor()
cursor.execute('select id,name from gitlab_projects')
project_list = cursor.fetchall()
#获取gitlab上每个工程的分支列表
gitlab_project_branchs(project_list)
#增量获取gitlab上每个工程的分支的commit记录
gitlab_project_commits(project_list)
conn.close()
#获取最近6个月的提交数据
#select name,branch_name,count(*) from gitlab_project_commits where date(create_at)>date(datetime('now','localtime'),'-6 month') group by name,branch_name;
#select date(datetime('now','localtime'),'-6 month');
- 备注
gitlab的api默认返回数据是20条,比如:http://x.x.x.x/api/v4/projects?simple=yes
返回的数据是20条;
可以通过per_page
参数来定义返回的数据条数,如果要返回30条数据:http://x.x.x.x/api/v4/projects?simple=yes&per_page=30
;
如果要查看所有的数据,需要查看api返回的headers里面的数据,X-Total
为数据的总条数,X-Total-Pages
为一共有多少页数据,如要以每页30条数据查看第2页的数据:http://x.x.x.x/api/v4/projects?simple=yes&per_page=30&page=2
;
结束
说实话统计代码的提交量是毫无意义的,主要还是要看代码质量;
代码有点烂,将就看了,主要是备份下,下次需要的时候再拿出来用。