Instantly share code, notes, and snippets.

Embed
What would you like to do?

Queries used in article Popularity of frontend frameworks in GitHub repositories

  1. package-json-files
  2. date-of-modification
  3. package-json-files-with-date
  4. package-json-dependencies
  5. frameworks-in-dependencies

package-json-files

SELECT contents.id as id, contents.content as content, contents.copies as copies, files.repo_name as repo_name
FROM [bigquery-public-data:github_repos.contents] contents
INNER JOIN (SELECT * FROM (
  SELECT *
  FROM [bigquery-public-data:github_repos.files]
  WHERE path = 'package.json'
)) files ON contents.id = files.id 

date-of-modification

#standardSQL

SELECT repo_name, max(committer.date) as last_date
FROM 
  `bigquery-public-data.github_repos.commits` t1,
  t1.difference as difference,
  UNNEST(repo_name) as repo_name
WHERE
  difference.new_path LIKE 'package.json'
GROUP BY repo_name

package-json-files-with-date

SELECT files.id as id, files.content as content, files.copies as copies, files.repo_name as repo_name, modification_date.last_date as last_date
FROM [package-json-files] files
INNER JOIN [date-of-modification] modification_date
  ON modification_date.repo_name = files.repo_name

package-json-dependencies

SELECT id, repo_name, last_date, dependency, type, version
FROM getDependencies(
  SELECT
    id,
    repo_name,
    last_date,
    JSON_EXTRACT(content, '$.dependencies') AS dependencies,
    JSON_EXTRACT(content, '$.devDependencies') AS devDependencies
  FROM [package-json-files-with-date]
)
// The UDF
function getDependencies(row, emit) {
  emitResults(row, emit, 'dependencies');
  emitResults(row, emit, 'devDependencies');
}

function emitResults(row, emit, type) {
  if (row[type]) {
    x = JSON.parse(row[type])
    if (x !== null && typeof x === 'object') {
      Object.keys(x).forEach(function(entry) {
        if (validDependency(x, entry)) {     
          emit({
            id: row.id,
            repo_name: row.repo_name,
            last_date: row.last_date,
            repo_name: row.repo_name,
            dependency: entry,
            type: type,
            version: decodeDependencyHelper(x, entry)
          });
        }
      });
    }
  }
}

function validDependency(x, entry) {
  return entry != '' && decodeDependencyHelper(x, entry) != '';
}

function decodeDependencyHelper(x, entry) {
    var returnVal = x[entry];
    return typeof returnVal === 'string' ? returnVal : '' + returnVal;
}

bigquery.defineFunction(
  'getDependencies',  // Name used to call the function from SQL

  ['id', 'repo_name', 'last_date', 'dependencies', 'devDependencies'],  // Input column names

  // JSON representation of the output schema
  [
    {name: 'id', type: 'string'},
    {name: 'repo_name', type: 'string'},
    {name: 'last_date', type: 'timestamp'},
    {name: 'dependency', type: 'string'},
    {name: 'type', type: 'string'},
    {name: 'version', type: 'string'},
  ],

  getDependencies  // The function reference
);

frameworks-in-dependencies

SELECT 
  a.year_date as year_date,
  a.month_date as month_date,
  a.dependency as dependency,
  a.dependency_count as dependency_count,
  b.occurence as occurence,
  (a.dependency_count * 100) / b.occurence as percent
FROM (
    SELECT 
      YEAR(last_date) as year_date,
      MONTH(last_date) as month_date,
      dependency,
      COUNT(dependency) as dependency_count
    FROM (
      Select
        repo_name,
        dependency,
        max(last_date) as last_date,
        max(type) as type,
        max(version) as version,
        max(id) as id
      from [package-json-dependencies]
      where type = "dependencies"
      Group by repo_name, dependency 
    )
    WHERE repo_name in (
      SELECT repo_name
      FROM [package-json-dependencies]
      GROUP BY repo_name
    )
    GROUP BY year_date, month_date, dependency
) a
INNER JOIN (
    SELECT 
      YEAR(last_date) as year_date,
      MONTH(last_date) as month_date,
      count(distinct repo_name) as occurence
    FROM (
      Select 
        repo_name,
        dependency,
        max(last_date) as last_date,
        max(type) as type,
        max(version) as version,
        max(id) as id
      from [package-json-dependencies]
      where type = "dependencies"
      Group by repo_name, dependency 
    )
    WHERE repo_name in (
      SELECT repo_name
      FROM [package-json-dependencies]
      GROUP BY repo_name
    )
    GROUP BY year_date, month_date
) b ON a.year_date = b.year_date AND a.month_date = b.month_date
WHERE dependency IN (
    'jquery',
    'react',
    'backbone',
    '@angular/core',
    'vue',
    'angular',
)
ORDER BY year_date DESC, month_date DESC, percent DESC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment