istvanfazakas/Readme.md

## Readme.md

      
    Raw
  

              Readme.md
            
          
    Usage of collect_gql_data.rb Script

Description

The collect_gql_data.rb script is collecting data regarding the usage of GQL COPs created by Goldsmiths. https://github.com/toptal/rubocop-toptal-graphql/
Requirements

The following GEMs are required:

google-cloud-bigquery (gem install google-cloud-bigquery)
em-http-request (gem install em-http-request)

The following ENV variable is required to run the script:

TT_WORK_PATH - this would be the path to the folder that contains the toptal projects

Usage


TT_WORK_PATH=<path-to-toptal-projects>/ ruby ./collect_gql_data.rb "toptal.com:api-project-726361118046.GoldsmithsTeam.gql_standardization_metrics" "billing"


## collect_gql_data.rb
#!/usr/bin/env ruby

require 'yaml'
require 'json'
require 'google/cloud/bigquery'
require 'csv'
require 'byebug'

module DataAnalyzer
  PROJECTS = {
    'platform' => {
      path: "#{ENV['TT_WORK_PATH']}/platform",
      schemas: {
        'cas' => ['api/lib/graphql_api/cas/', 'spec/api/lib/graphql_api/cas/'],
        'client' => ['api/lib/graphql_api/client', 'spec/api/lib/graphql_api/client'],
        'platform' => ['api/lib/graphql_api/platform', 'spec/api/lib/graphql_api/platform'],
        'public' => ['api/lib/graphql_api/public', 'spec/api/lib/graphql_api/public'],
        'staff' => ['api/lib/graphql_api/staff', 'api/lib/graphql_api/staff'],
        'talent' => ['api/lib/graphql_api/talent', 'spec/api/lib/graphql_api/talent'],
        'talent_public' => ['api/lib/graphql_api/talent_public', 'spec/api/lib/graphql_api/talent_public'],
        'community' => ['engines/community/app/graphql/', 'engines/community/spec/graphql'],
        'screening' => ['engines/screening/app/graphql', 'engines/screening/spec/graphql'],
        'talent_activation' => ['engines/talent_activation/app/graphql/', 'engines/talent_activation/spec/graphql'],
        'talent_profile' => ['engines/talent_profile/app/graphql/', 'engines/talent_profile/spec/graphql'],
        'talent_success' => ['engines/talent_success/app/graphql/', 'engines/talent_success/spec/graphql'],
        'topscreen' => ['engines/topscreen/app/graphql/', 'engines/topscreen/spec/graphql']
      }
    },
    'billing' => {
      path: "#{ENV['TT_WORK_PATH']}/billing",
      schemas: {
        'documents/staff' => ['app/graphql/documents/staff', 'spec/graphql/documents/staff'],
        'documents/talent' => ['app/graphql/documents/talent', 'spec/graphql/documents/talent'],
        'internal' => ['app/graphql/billing/gql/internal', 'spec/graphql/billing/gql/internal'],
        'staff' => ['app/graphql/billing/gql/staff', 'spec/graphql/billing/gql/staff'],
        'talent' => ['app/graphql/billing/gql/talent', 'spec/graphql/billing/gql/talent']
      }
    },
    'rti-platform' => {
      path: "#{ENV['TT_WORK_PATH']}/rti-platform",
      schemas: {
        'p2p/gql/staff' => ['app/graphql/p2p/gql/staff', 'spec/app/graphql/p2p/gql/staff'],
        'p2p/gql/client' => ['app/graphql/p2p/gql/client', 'spec/app/graphql/p2p/gql/client'],
        'p2p/gql/talent' => ['app/graphql/p2p/gql/talent', 'spec/app/graphql/p2p/gql/talent']
      }
    },
    'testing-platform-backend' => {
      path: "#{ENV['TT_WORK_PATH']}/testing-platform-backend",
      schemas: {
        'talent' => ['app/lib/graphql_api/talent', 'spec/lib/graphql_api/talent'],
        'staff' => ['app/lib/graphql_api/staff', 'spec/lib/graphql_api/staff'],
        'public' => ['app/lib/graphql_api/public', 'spec/lib/graphql_api/public']
      }
    },
    'topteam' => {
      path: "#{ENV['TT_WORK_PATH']}/topteam",
      schemas: {
        'topteam' => ['app/graphql', 'spec/graphql']
      }
    },
    'chronicles' => {
      path: "#{ENV['TT_WORK_PATH']}/chronicles",
      schemas: {
        'staff' => ['app/graphql/stff', 'spec/graphql/staff']
      }
    },
    'top-retro-board-backend' => {
      path: "#{ENV['TT_WORK_PATH']}/top-retro-board-backend",
      schemas: {
        'top-retro-board' => ['app/graphql', 'spec/graphql']
      }
    },
    'video-screening-backend' => {
      path: "#{ENV['TT_WORK_PATH']}/video-screening-backend",
      schemas: {
        'video-screening' => ['app/graphql', 'spec/graphql']
      }
    },
    'top-scheduler' => {
      path: "#{ENV['TT_WORK_PATH']}/top-scheduler",
      schemas: {
        'top-scheduler' => ['app/graphql', 'spec/graphql']
      }
    }
  }.freeze

  class BigQuery # :nodoc:
    def initialize(project_args)
      scope, tail = project_args.split(':')
      project_id, dataset_id, table_id = tail.split('.')
      @project_id = "#{scope}:#{project_id}"
      @dataset_id = dataset_id
      @table_id = table_id
    end

    def call(formatted_data)
      insert_data formatted_data
      # export_csv formatted_data
    end

    private

    attr_reader :project_id, :table_id, :dataset_id

    def export_csv(formatted_data)
      keys = formatted_data.first.keys

      CSV.open("#{ARGV[1]}.csv", 'w') do |csv|
        csv << keys

        formatted_data.each { |data| csv << data.values }
      end
    end

    def insert_data(formatted_data)
      p "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-"
      p "Project ID: #{project_id}"
      p "Table ID: #{table_id}"
      p "Dataset ID: #{dataset_id}"
      p "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-"

      formatted_data.each_slice(1000) do |slice|
        response = table.insert(slice)
        if response.success?
          print '.'
        else
          puts "Failed to insert #{response.error_rows.count} rows."
          response.insert_errors.each { |err| "#{err.index} - #{err.errors}" }
          exit(1)
        end
      end
      p '-=-=-=-=-=-==-=-=-=-=-=-=-=-'
      p 'Data pushed to BigQuery'
      # p JSON.pretty_generate(formatted_data)
    end

    def dataset
      @dataset ||= big_query.dataset(dataset_id)
    end

    def table
      @table ||= dataset.table(table_id)
    end

    def big_query
      @big_query ||= Google::Cloud::Bigquery.new(project_id: project_id)
    end
  end

  class RubocopOffensesCount # :nodoc: rubocop:disable Metrics/ClassLength
    CONFIG_FILE = '.rubocop_graphql.yml'.freeze
    TODO_CONFIG_FILE = '.rubocop_graphql_todo.yml'.freeze
    BACKUP_TODO_CONFIG_FILE = '.backup_rubocop_graphql_todo.yml'.freeze
    CLIENT_TODO_CONFIG_FILE = '.rubocop_graphql_client_todo.yml'.freeze
    BACKUP_CLIENT_TODO_CONFIG_FILE = '.backup_rubocop_graphql_client_todo.yml'.freeze

    def self.call(only:)
      if (PROJECTS.keys & Array(only)) == Array(only)
        new(only: only).call
      else
        warn "Invalid project name. Valid project are: #{PROJECTS.keys}"
        warn 'Please separate the project names only with single comma, and no space between.'
        exit(false)
      end
    end

    def initialize(only:)
      @only = Array(only)
    end

    def call
      data = process_schemas
      format_data(data)
    end

    private

    attr_reader :gql_cops, :only, :config_file

    def format_data(data)
      formatted_data = []
      data.each_with_object({}) do |(project, schemas), _hash|
        schemas.each do |schema_name, cops|
          formatted_data += format(project, schema_name, cops)
        end
      end

      formatted_data
    end

    def format(project, schema_name, cops)
      gql_cops.split(',').map do |cop|
        offended_cop = cops.detect { |cp| cp[:cop] == cop }

        resp = {
          github_repo_name: project, gql_schema: schema_name, cop_name: cop,
          cop_enabled: false, number_of_offenses_in_schema: 0, created_at: Time.now
        }

        next resp unless offended_cop

        resp.merge(cop_enabled: offended_cop[:enabled], number_of_offenses_in_schema: offended_cop[:count].to_i)
      end
    end

    def process_schemas # rubocop:disable Metrics/MethodLength
      PROJECTS.slice(*only).each_with_object({}) do |(project, project_data), hash|
        project_path = project_data[:path]
        next if project_path.nil?

        Dir.chdir(project_path) do
          backup_file(project_path, CLIENT_TODO_CONFIG_FILE, BACKUP_CLIENT_TODO_CONFIG_FILE)
          backup_file(project_path, TODO_CONFIG_FILE, BACKUP_TODO_CONFIG_FILE)

          @config_file = YAML.load_file("#{project_path}/#{CONFIG_FILE}")
          @gql_cops = (config_file.keys - ['require']).select { |cop| cop.include?('ToptalGraphql') }.join(',')

          data = project_data[:schemas].each_with_object({}) do |(schema, schema_paths), inner_hash|
            inner_hash[schema] = get_schema_counts(schema, schema_paths)
          end

          restore_file(project_path, CLIENT_TODO_CONFIG_FILE, BACKUP_CLIENT_TODO_CONFIG_FILE)
          restore_file(project_path, TODO_CONFIG_FILE, BACKUP_TODO_CONFIG_FILE)

          hash[project] = data.compact
        end
      end
    end

    def backup_file(path, config_file_name, backup_file_name)
      config_path = "#{path}/#{config_file_name}"

      return unless File.file?(config_path)

      File.rename(config_path, "#{path}/#{backup_file_name}")
      File.new(config_path, 'w')
    end

    def restore_file(path, config_file_name, backup_file_name)
      return unless File.file?("#{path}/#{backup_file_name}")

      File.delete("#{path}/#{config_file_name}")
      File.rename("#{path}/#{backup_file_name}", "#{path}/#{config_file_name}")
    end

    def get_schema_counts(schema, schema_paths)
      ruby_version = `rbenv local`.strip
      offense_counts = `RUBY_VERSION=#{ruby_version} bundle exec rubocop #{schema_paths.join(' ')} --format offenses --only #{gql_cops}`.strip # rubocop:disable Layout/LineLength

      # p "RUBY VERSION: #{`rbenv local`}"
      # p "SCHEMA: #{schema}"
      # p "COUNTS: #{offense_counts}"
      # p '-=-=-=-=-=-=-=-=-=-=-=-=-=-=-'

      get_counts(offense_counts, schema, schema_paths)
    end

    def get_counts(offense_counts, schema, schema_paths)
      offense_counts.split("\n").map do |count_data|
        (off_count, cop_name) = count_data.split(' ')
        next if cop_name.nil? || cop_name == 'Total'

        enabled = cop_enabled_for_schema?(cop_name, schema, schema_paths)
        {cop: cop_name, count: off_count, enabled: enabled}
      end.compact
    end

    def cop_enabled_for_schema?(cop, schema, schema_paths) # rubocop:disable Metrics/MethodLength, Metrics/PerceivedComplexity
      config = config_file[cop]
      include_data = config['Include']

      return false if config['Enabled'] == false
      return true if config['Enabled'] && include_data.nil?

      schema_paths.map do |schema_path|
        next true unless include_data.detect { |path| path.start_with?('**') }.nil?
        next true unless include_data.detect { |path| path.include?(schema_path) }.nil?
        next true unless include_data.detect { |path| path.include?(schema_path.gsub(schema, '**')) }.nil?
        next true unless include_data.detect { |path| path.include?(schema_path.gsub("#{schema}/app", '**')) }.nil?

        false
      end.reduce(:|)
    end
  end
end

formatted_data = DataAnalyzer::RubocopOffensesCount.call(only: ARGV[1].split(','))
# toptal.com:api-project-726361118046.GoldsmithsTeam.gql_standardization_metrics
DataAnalyzer::BigQuery.new(ARGV[0]).call(formatted_data)
	#!/usr/bin/env ruby

	require 'yaml'
	require 'json'
	require 'google/cloud/bigquery'
	require 'csv'
	require 'byebug'

	module DataAnalyzer
	PROJECTS = {
	'platform' => {
	path: "#{ENV['TT_WORK_PATH']}/platform",
	schemas: {
	'cas' => ['api/lib/graphql_api/cas/', 'spec/api/lib/graphql_api/cas/'],
	'client' => ['api/lib/graphql_api/client', 'spec/api/lib/graphql_api/client'],
	'platform' => ['api/lib/graphql_api/platform', 'spec/api/lib/graphql_api/platform'],
	'public' => ['api/lib/graphql_api/public', 'spec/api/lib/graphql_api/public'],
	'staff' => ['api/lib/graphql_api/staff', 'api/lib/graphql_api/staff'],
	'talent' => ['api/lib/graphql_api/talent', 'spec/api/lib/graphql_api/talent'],
	'talent_public' => ['api/lib/graphql_api/talent_public', 'spec/api/lib/graphql_api/talent_public'],
	'community' => ['engines/community/app/graphql/', 'engines/community/spec/graphql'],
	'screening' => ['engines/screening/app/graphql', 'engines/screening/spec/graphql'],
	'talent_activation' => ['engines/talent_activation/app/graphql/', 'engines/talent_activation/spec/graphql'],
	'talent_profile' => ['engines/talent_profile/app/graphql/', 'engines/talent_profile/spec/graphql'],
	'talent_success' => ['engines/talent_success/app/graphql/', 'engines/talent_success/spec/graphql'],
	'topscreen' => ['engines/topscreen/app/graphql/', 'engines/topscreen/spec/graphql']
	}
	},
	'billing' => {
	path: "#{ENV['TT_WORK_PATH']}/billing",
	schemas: {
	'documents/staff' => ['app/graphql/documents/staff', 'spec/graphql/documents/staff'],
	'documents/talent' => ['app/graphql/documents/talent', 'spec/graphql/documents/talent'],
	'internal' => ['app/graphql/billing/gql/internal', 'spec/graphql/billing/gql/internal'],
	'staff' => ['app/graphql/billing/gql/staff', 'spec/graphql/billing/gql/staff'],
	'talent' => ['app/graphql/billing/gql/talent', 'spec/graphql/billing/gql/talent']
	}
	},
	'rti-platform' => {
	path: "#{ENV['TT_WORK_PATH']}/rti-platform",
	schemas: {
	'p2p/gql/staff' => ['app/graphql/p2p/gql/staff', 'spec/app/graphql/p2p/gql/staff'],
	'p2p/gql/client' => ['app/graphql/p2p/gql/client', 'spec/app/graphql/p2p/gql/client'],
	'p2p/gql/talent' => ['app/graphql/p2p/gql/talent', 'spec/app/graphql/p2p/gql/talent']
	}
	},
	'testing-platform-backend' => {
	path: "#{ENV['TT_WORK_PATH']}/testing-platform-backend",
	schemas: {
	'talent' => ['app/lib/graphql_api/talent', 'spec/lib/graphql_api/talent'],
	'staff' => ['app/lib/graphql_api/staff', 'spec/lib/graphql_api/staff'],
	'public' => ['app/lib/graphql_api/public', 'spec/lib/graphql_api/public']
	}
	},
	'topteam' => {
	path: "#{ENV['TT_WORK_PATH']}/topteam",
	schemas: {
	'topteam' => ['app/graphql', 'spec/graphql']
	}
	},
	'chronicles' => {
	path: "#{ENV['TT_WORK_PATH']}/chronicles",
	schemas: {
	'staff' => ['app/graphql/stff', 'spec/graphql/staff']
	}
	},
	'top-retro-board-backend' => {
	path: "#{ENV['TT_WORK_PATH']}/top-retro-board-backend",
	schemas: {
	'top-retro-board' => ['app/graphql', 'spec/graphql']
	}
	},
	'video-screening-backend' => {
	path: "#{ENV['TT_WORK_PATH']}/video-screening-backend",
	schemas: {
	'video-screening' => ['app/graphql', 'spec/graphql']
	}
	},
	'top-scheduler' => {
	path: "#{ENV['TT_WORK_PATH']}/top-scheduler",
	schemas: {
	'top-scheduler' => ['app/graphql', 'spec/graphql']
	}
	}
	}.freeze

	class BigQuery # :nodoc:
	def initialize(project_args)
	scope, tail = project_args.split(':')
	project_id, dataset_id, table_id = tail.split('.')
	@project_id = "#{scope}:#{project_id}"
	@dataset_id = dataset_id
	@table_id = table_id
	end

	def call(formatted_data)
	insert_data formatted_data
	# export_csv formatted_data
	end

	private

	attr_reader :project_id, :table_id, :dataset_id

	def export_csv(formatted_data)
	keys = formatted_data.first.keys

	CSV.open("#{ARGV[1]}.csv", 'w') do \|csv\|
	csv << keys

	formatted_data.each { \|data\| csv << data.values }
	end
	end

	def insert_data(formatted_data)
	p "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-"
	p "Project ID: #{project_id}"
	p "Table ID: #{table_id}"
	p "Dataset ID: #{dataset_id}"
	p "-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-"

	formatted_data.each_slice(1000) do \|slice\|
	response = table.insert(slice)
	if response.success?
	print '.'
	else
	puts "Failed to insert #{response.error_rows.count} rows."
	response.insert_errors.each { \|err\| "#{err.index} - #{err.errors}" }
	exit(1)
	end
	end
	p '-=-=-=-=-=-==-=-=-=-=-=-=-=-'
	p 'Data pushed to BigQuery'
	# p JSON.pretty_generate(formatted_data)
	end

	def dataset
	@dataset \|\|= big_query.dataset(dataset_id)
	end

	def table
	@table \|\|= dataset.table(table_id)
	end

	def big_query
	@big_query \|\|= Google::Cloud::Bigquery.new(project_id: project_id)
	end
	end

	class RubocopOffensesCount # :nodoc: rubocop:disable Metrics/ClassLength
	CONFIG_FILE = '.rubocop_graphql.yml'.freeze
	TODO_CONFIG_FILE = '.rubocop_graphql_todo.yml'.freeze
	BACKUP_TODO_CONFIG_FILE = '.backup_rubocop_graphql_todo.yml'.freeze
	CLIENT_TODO_CONFIG_FILE = '.rubocop_graphql_client_todo.yml'.freeze
	BACKUP_CLIENT_TODO_CONFIG_FILE = '.backup_rubocop_graphql_client_todo.yml'.freeze

	def self.call(only:)
	if (PROJECTS.keys & Array(only)) == Array(only)
	new(only: only).call
	else
	warn "Invalid project name. Valid project are: #{PROJECTS.keys}"
	warn 'Please separate the project names only with single comma, and no space between.'
	exit(false)
	end
	end

	def initialize(only:)
	@only = Array(only)
	end

	def call
	data = process_schemas
	format_data(data)
	end

	private

	attr_reader :gql_cops, :only, :config_file

	def format_data(data)
	formatted_data = []
	data.each_with_object({}) do \|(project, schemas), _hash\|
	schemas.each do \|schema_name, cops\|
	formatted_data += format(project, schema_name, cops)
	end
	end

	formatted_data
	end

	def format(project, schema_name, cops)
	gql_cops.split(',').map do \|cop\|
	offended_cop = cops.detect { \|cp\| cp[:cop] == cop }

	resp = {
	github_repo_name: project, gql_schema: schema_name, cop_name: cop,
	cop_enabled: false, number_of_offenses_in_schema: 0, created_at: Time.now
	}

	next resp unless offended_cop

	resp.merge(cop_enabled: offended_cop[:enabled], number_of_offenses_in_schema: offended_cop[:count].to_i)
	end
	end

	def process_schemas # rubocop:disable Metrics/MethodLength
	PROJECTS.slice(*only).each_with_object({}) do \|(project, project_data), hash\|
	project_path = project_data[:path]
	next if project_path.nil?

	Dir.chdir(project_path) do
	backup_file(project_path, CLIENT_TODO_CONFIG_FILE, BACKUP_CLIENT_TODO_CONFIG_FILE)
	backup_file(project_path, TODO_CONFIG_FILE, BACKUP_TODO_CONFIG_FILE)

	@config_file = YAML.load_file("#{project_path}/#{CONFIG_FILE}")
	@gql_cops = (config_file.keys - ['require']).select { \|cop\| cop.include?('ToptalGraphql') }.join(',')

	data = project_data[:schemas].each_with_object({}) do \|(schema, schema_paths), inner_hash\|
	inner_hash[schema] = get_schema_counts(schema, schema_paths)
	end

	restore_file(project_path, CLIENT_TODO_CONFIG_FILE, BACKUP_CLIENT_TODO_CONFIG_FILE)
	restore_file(project_path, TODO_CONFIG_FILE, BACKUP_TODO_CONFIG_FILE)

	hash[project] = data.compact
	end
	end
	end

	def backup_file(path, config_file_name, backup_file_name)
	config_path = "#{path}/#{config_file_name}"

	return unless File.file?(config_path)

	File.rename(config_path, "#{path}/#{backup_file_name}")
	File.new(config_path, 'w')
	end

	def restore_file(path, config_file_name, backup_file_name)
	return unless File.file?("#{path}/#{backup_file_name}")

	File.delete("#{path}/#{config_file_name}")
	File.rename("#{path}/#{backup_file_name}", "#{path}/#{config_file_name}")
	end

	def get_schema_counts(schema, schema_paths)
	ruby_version = `rbenv local`.strip
	offense_counts = `RUBY_VERSION=#{ruby_version} bundle exec rubocop #{schema_paths.join(' ')} --format offenses --only #{gql_cops}`.strip # rubocop:disable Layout/LineLength

	# p "RUBY VERSION: #{`rbenv local`}"
	# p "SCHEMA: #{schema}"
	# p "COUNTS: #{offense_counts}"
	# p '-=-=-=-=-=-=-=-=-=-=-=-=-=-=-'

	get_counts(offense_counts, schema, schema_paths)
	end

	def get_counts(offense_counts, schema, schema_paths)
	offense_counts.split("\n").map do \|count_data\|
	(off_count, cop_name) = count_data.split(' ')
	next if cop_name.nil? \|\| cop_name == 'Total'

	enabled = cop_enabled_for_schema?(cop_name, schema, schema_paths)
	{cop: cop_name, count: off_count, enabled: enabled}
	end.compact
	end

	def cop_enabled_for_schema?(cop, schema, schema_paths) # rubocop:disable Metrics/MethodLength, Metrics/PerceivedComplexity
	config = config_file[cop]
	include_data = config['Include']

	return false if config['Enabled'] == false
	return true if config['Enabled'] && include_data.nil?

	schema_paths.map do \|schema_path\|
	next true unless include_data.detect { \|path\| path.start_with?('**') }.nil?
	next true unless include_data.detect { \|path\| path.include?(schema_path) }.nil?
	next true unless include_data.detect { \|path\| path.include?(schema_path.gsub(schema, '**')) }.nil?
	next true unless include_data.detect { \|path\| path.include?(schema_path.gsub("#{schema}/app", '**')) }.nil?

	false
	end.reduce(:\|)
	end
	end
	end

	formatted_data = DataAnalyzer::RubocopOffensesCount.call(only: ARGV[1].split(','))
	# toptal.com:api-project-726361118046.GoldsmithsTeam.gql_standardization_metrics
	DataAnalyzer::BigQuery.new(ARGV[0]).call(formatted_data)