Skip to content

Instantly share code, notes, and snippets.

@ranguard
Created June 20, 2016 20:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ranguard/b877a33dde9c2fed74ae3b51a700d687 to your computer and use it in GitHub Desktop.
Save ranguard/b877a33dde9c2fed74ae3b51a700d687 to your computer and use it in GitHub Desktop.
some code for fetching from cpan
#!/usr/bin/env perl
# PODNAME: fetch_dist_versions_for_tests.pl
#
# takes a distribution (e.g. Data-Page), looks up all versions, downloads and
# creates configs to copy into the testing data dir
# 'test-data/fakecpan/configs/from_cpan'
# which work for CPAN::Faker to /tmp/faked/<DIST>-version.json
#### USAGE:
# Use the real content
# ./bin/run bin/fetch_dist_versions_for_tests.pl Data-Page
# Use minimal content as you only care about the name
# ./bin/run bin/fetch_dist_versions_for_tests.pl Data-Page 0
use strictures 1;
use Cpanel::JSON::XS;
use YAML;
use IO::All;
use Email::Address;
use File::Find;
use File::Spec;
use Path::Class;
use HTTP::Tiny;
use DDP;
use Search::Elasticsearch;
my $dist_name = $ARGV[0] || die 'Supply a distribution, e.g. Data-Page';
# Include the real file's content in the fake data, or just a placeholder
my $include_content = $ARGV[1] // 1;
# play area
my $dump_dir = dir('/tmp/mc-api-test-builder');
my $config_dir = $dump_dir->subdir('configs'); # output stored here
my $source_dir = $dump_dir->subdir('source'); # downloads / extracs
# Go make all the dirs
$config_dir->mkpath;
$source_dir->mkpath;
chdir $source_dir->stringify;
# Fetch all the dists for $dist_name
my $dist_list = _fetch_dist_list();
my $http = HTTP::Tiny->new();
for my $dist_version ( @{$dist_list} ) {
my $dist_info = $dist_version->{fields};
my $url = $dist_info->{download_url}->[0];
my $basename = file($url)->basename;
if ( $basename !~ /\.tar\.gz/ ) {
warn "Unsupported compression $basename skipping";
next;
}
# Download and uncompress
my $file = $source_dir->file($basename);
my $response = $http->mirror( $url, $file );
unless ( $response->{success} ) {
warn "Unable to mirror $url";
}
# Extract it
system("/bin/tar -xf $file");
# Clean off extensions
$file =~ s/\.tar\.gz$//;
# Run the dir
_run_dir( $dist_info, "$file" );
}
print
"Edit/copy files from $config_dir to test-data/fakecpan/configs/from_cpan\n";
sub _run_dir {
my ( $dist_info, $dir_to_process ) = @_;
my $files;
File::Find::find(
{
no_chdir => 1,
wanted => sub {
return unless -f;
push @$files,
{
file => File::Spec->abs2rel(
$File::Find::name, dir($dir_to_process)
),
content => (
$include_content
? io($_)->all
: '-- files content see fetch_dist_versions_for_tests.pl --'
),
};
},
},
"$dir_to_process"
);
my $output = {
name => $dist_info->{distribution}->[0],
version => $dist_info->{version}->[0],
abstract => $dist_info->{abstract}->[0] || 'ABSTRACT goes here',
X_Module_Faker => {
cpan_author => $dist_info->{author}->[0],
append => $files
},
};
my $config = Cpanel::JSON::XS->new->pretty->encode($output);
my $dist_version_name = $dist_info->{name}->[0];
my $save_as = $config_dir->file( $dist_version_name . '.json' );
$save_as->spew($config);
print "Written: $save_as\n";
}
sub _fetch_dist_list {
my $es = _es();
my $uploads = $es->search(
index => 'v1',
type => 'release',
body => {
query => {
filtered => {
query => { match_all => {} },
filter => { term => { distribution => $dist_name } }
}
},
size => 1000,
sort => [ { date => 'desc' } ],
fields => [
qw( download_url name author abstract version version_numified
status maturity authorized distribution)
],
}
);
return $uploads->{hits}->{hits};
}
sub _es {
return Search::Elasticsearch->new(
cxn_pool => 'Static::NoPing',
nodes => 'https://api-v1.metacpan.org/',
# trace_to => 'Stdout',
send_get_body_as => 'POST',
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment