Skip to content

add on demand package data collection for golang #596 #608

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
50 changes: 50 additions & 0 deletions minecode/collectors/bitbucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging

import requests


"""
Collect bitbucket packages from bitbucket registries.
"""

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.addHandler(handler)
logger.setLevel(logging.INFO)


def bitbucket_get_all_package_version_author(subset_path):
"""
Return a list of all version numbers along with author for the package.
"""
repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags"
version_author_list = []
try:
while repo_tags:
response = requests.get(repo_tags)
response.raise_for_status()
data = response.json()
if data["size"] > 0:
# Get all available versions
for item in data["values"]:
version = item["name"]
author = ""
if "target" in item and item["target"]:
if "author" in item["target"] and item["target"]["author"]:
if item["target"]["author"]["type"] == "author":
author = item["target"]["author"]["user"]["display_name"]
version_author_list.append((version, author))
# Handle pagination
repo_tags = data.get("next", None)
return version_author_list
except requests.exceptions.HTTPError as err:
logger.error(f"HTTP error occurred: {err}")
5 changes: 4 additions & 1 deletion minecode/collectors/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def packagedata_from_dict(package_data):
return PackageData.from_data(cleaned_package_data)


def map_fetchcode_supported_package(package_url, pipelines, priority=0):
def map_fetchcode_supported_package(package_url, pipelines, priority=0, from_go_lang=False):
"""
Add a `package_url` supported by fetchcode to the PackageDB.

Expand All @@ -116,6 +116,9 @@ def map_fetchcode_supported_package(package_url, pipelines, priority=0):
logger.error(error)
return error

if from_go_lang:
packages[0].type = "golang"
packages[0].namespace = "github.com/" + packages[0].namespace
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chinyeungli could there be golang packages not from github?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Only golang packages from github use this map_fetchcode_supported_package function.
Others will use map_golang_package()

package_data = packages[0].to_dict()

# Remove obsolete Package fields see https://github.com/aboutcode-org/fetchcode/issues/108
Expand Down
34 changes: 34 additions & 0 deletions minecode/collectors/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,40 @@
from minecode.collectors.generic import map_fetchcode_supported_package


def github_get_all_versions(subset_path):
"""
Fetch all versions (tags) from a GitHub repository using the API
Returns a list of all version tags in the repository
"""
import requests

url = f"https://api.github.com/repos/{subset_path}/tags"
version_list = []
page = 1

while True:
response = requests.get(
url,
params={"page": page, "per_page": 100}, # Max 100 per page
headers={"Accept": "application/vnd.github.v3+json"},
)
response.raise_for_status()

data = response.json()
if not data:
break

for tag in data:
version_list.append(tag["name"])
page += 1

# Check if we've reached the last page
if "next" not in response.links:
break

return version_list


# Indexing GitHub PURLs requires a GitHub API token.
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route("pkg:github/.*")
Expand Down
44 changes: 44 additions & 0 deletions minecode/collectors/gitlab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging

import requests


"""
Collect gitlab packages from gitlab registries.
"""

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.addHandler(handler)
logger.setLevel(logging.INFO)


def gitlab_get_all_package_version_author(subset_path):
"""
Return a list of all version numbers along with author and author email
for the package.
"""
repo_tags = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/tags"
try:
response = requests.get(repo_tags)
response.raise_for_status()
data = response.json()
version_author_list = []
# Get all available versions
for item in data:
version = item["name"]
author = item["commit"]["author_name"]
author_email = item["commit"]["author_email"]
version_author_list.append((version, author, author_email))
return version_author_list
except requests.exceptions.HTTPError as err:
logger.error(f"HTTP error occurred: {err}")
Loading