Deep Dive into a Project
Analyze detailed metrics for a specific project. New to OSO? Check out our Getting Started guide to set up your BigQuery or API access.
All projects are defined as YAML files in OSS Directory. View our current projects here.
Getting Started
Before running any analysis, you'll need to set up your environment:
- SQL
- Python
- GraphQL
If you haven't already, subscribe to OSO public datasets in BigQuery by clicking the "Subscribe" button on our Datasets page.
You can run all queries in this guide directly in the BigQuery console.
Start your Python notebook with the following:
from google.cloud import bigquery
import pandas as pd
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = # PATH TO YOUR CREDENTIALS JSON
GCP_PROJECT = # YOUR GCP PROJECT NAME
client = bigquery.Client(GCP_PROJECT)
For more details on setting up Python notebooks, see our guide on writing Python notebooks.
The following queries should work if you copy-paste them into our GraphQL sandbox. For more information on how to use the GraphQL API, check out our GraphQL guide.
Find a Project
Search for projects by name:
- SQL
- Python
- GraphQL
select
project_id,
project_name,
display_name
from `oso_production.projects_v1`
where lower(display_name) like lower('%merkle%')
query = """
select
project_id,
project_name,
display_name
from `oso_production.projects_v1`
where lower(display_name) like lower('%merkle%')
"""
df = client.query(query).to_dataframe()
query FindProject {
oso_projectsV1(where: { display_name: { _ilike: "%merkle%" } }) {
projectId
projectName
displayName
}
}
Find a Project by Artifact
Find projects associated with specific artifacts:
- SQL
- Python
- GraphQL
select
project_id,
project_name,
artifact_namespace as github_owner,
artifact_name as github_repo
from `oso_production.artifacts_by_project_v1`
where
artifact_source = 'GITHUB'
and artifact_namespace like '%uniswap%'
query = """
select
project_id,
project_name,
artifact_namespace as github_owner,
artifact_name as github_repo
from `oso_production.artifacts_by_project_v1`
where
artifact_source = 'GITHUB'
and artifact_namespace like '%uniswap%'
"""
df = client.query(query).to_dataframe()
query FindProjectByArtifact {
oso_artifactsByProjectV1(
where: {
artifactNamespace: { _ilike: "%uniswap%" }
artifactSource: { _eq: "GITHUB" }
}
) {
projectId
projectName
artifactNamespace
artifactName
}
}
Code Metrics
Get code metrics for a specific project:
- SQL
- Python
- GraphQL
select
project_name,
display_name,
star_count,
fork_count,
commit_count_6_months,
contributor_count_6_months
from `oso_production.code_metrics_by_project_v1`
where project_name = 'opensource-observer'
query = """
select
project_name,
display_name,
star_count,
fork_count,
commit_count_6_months,
contributor_count_6_months
from `oso_production.code_metrics_by_project_v1`
where project_name = 'opensource-observer'
"""
df = client.query(query).to_dataframe()
query CodeMetricsForProject {
oso_codeMetricsByProjectV1(
where: { projectName: { _eq: "opensource-observer" } }
) {
projectName
displayName
starCount
forkCount
commitCount6Months
contributorCount6Months
}
}
Timeseries Metrics
Get historical metrics for a project:
- SQL
- Python
- GraphQL
select
tm.sample_date,
m.metric_name,
tm.amount
from `oso_production.timeseries_metrics_by_project_v0` as tm
join `oso_production.metrics_v0` as m
on tm.metric_id = m.metric_id
join `oso_production.projects_v1` as p
on tm.project_id = p.project_id
where p.project_name = 'wevm'
order by sample_date desc
query = """
select
tm.sample_date,
m.metric_name,
tm.amount
from `oso_production.timeseries_metrics_by_project_v0` as tm
join `oso_production.metrics_v0` as m
on tm.metric_id = m.metric_id
join `oso_production.projects_v1` as p
on tm.project_id = p.project_id
where p.project_name = 'wevm'
order by sample_date desc
"""
df = client.query(query).to_dataframe()
query TimeseriesMetrics {
timeseriesMetrics: oso_timeseriesMetricsByProjectV0(
where: {
projectId: { _eq: "Erx9J64anc8oSeN-wDKm0sojJf8ONrFVYbQ7GFnqSyc=" }
}
) {
sampleDate
metricId
amount
}
metrics: oso_metricsV0 {
metricId
metricName
}
}
Note: You'll need to join these results client-side by matching metricId
values from both queries.
Code Contributors
Get all contributors to a project's GitHub repositories:
- SQL
- Python
- GraphQL
select
te.time,
a.artifact_name as code_contributor,
abp.artifact_name as github_repo,
te.event_type,
te.amount
from `oso_production.timeseries_events_by_artifact_v0` as te
join `oso_production.artifacts_by_project_v1` as abp
on te.to_artifact_id = abp.artifact_id
join `oso_production.artifacts_v1` a
on te.from_artifact_id = a.artifact_id
where
abp.project_name = 'ipfs'
and te.event_type = 'COMMIT_CODE'
order by te.time desc
query = """
select
te.time,
a.artifact_name as code_contributor,
abp.artifact_name as github_repo,
te.event_type,
te.amount
from `oso_production.timeseries_events_by_artifact_v0` as te
join `oso_production.artifacts_by_project_v1` as abp
on te.to_artifact_id = abp.artifact_id
join `oso_production.artifacts_v1` a
on te.from_artifact_id = a.artifact_id
where
abp.project_name = 'ipfs'
and te.event_type = 'COMMIT_CODE'
order by te.time desc
"""
df = client.query(query).to_dataframe()
To get this data in GraphQL, you'll need to run multiple queries and join them client-side:
- First, get the project's artifacts:
query GetProjectArtifacts {
oso_artifactsByProjectV1(
where: { projectName: { _eq: "ipfs" } }
) {
artifactId
artifactName
}
}
- Then, for each artifact, get the events:
query GetArtifactEvents($artifactId: String!) {
oso_timeseriesEventsByArtifactV0(
where: {
toArtifactId: { _eq: $artifactId }
eventType: { _eq: "COMMIT_CODE" }
}
) {
time
fromArtifactId
eventType
amount
}
}
- Finally, get the contributor names:
query GetContributors($artifactIds: [String!]) {
oso_artifactsV1(
where: { artifactId: { _in: $artifactIds } }
) {
artifactId
artifactName
}
}
You'll need to combine these results client-side to get the complete picture.
Adding Projects
Projects are defined as YAML files in our OSS Directory repo. You can add or update your own projects or project artifacts by submitting a pull request.
For more information on how projects work, see our guide here.