fwolf/mcurl-pornhub.php

## mcurl-pornhub.php
<?php

if (2 > $argc) {
    $basename = basename(__FILE__);
    echo "
    Usage: $basename PORNHUB_URL

Need mcurl.sh to download video.

Notice: Cannot run on Mac OSX due to mcurl.sh compatible.

" . PHP_EOL;
    exit;
}

$url = $argv[1];
$opts = ['http' => [
    'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
]];
$context = stream_context_create($opts);
$html = file_get_contents($url, false, $context);
if (empty($html)) {
    echo "Retrieve url fail" . PHP_EOL;
    exit;
}

$urlKey = strstr($url, '?', false);
$urlKey = str_replace('?viewkey=', '', $urlKey);

$matches = [];
preg_match('/"quality":"([^"]+)","videoUrl":"([^"]+)"/', $html, $matches);
if (empty($matches)) {
    echo "Video url not found" . PHP_EOL;
    exit;
}
$quality = $matches[1];
$videoUrl = stripslashes($matches[2]);

$oldFileName = strstr($videoUrl, '?', true);
$oldFileName = substr(strrchr($oldFileName, '/'), 1);

preg_match('/<title>([^<]+)<\/title>/', $html, $matches);
if (empty($matches)) {
    echo "Title not found" . PHP_EOL;
    exit;
}

$title = $matches[1];
$title = str_replace('- Pornhub.com', '', $title);
$title = trim($title, " .");
$title = str_replace('/', '／', $title);
$newFileName = preg_replace("[^\w\s\d\.\-_~,;:\[\]\(\]]", '', $title) .
   ".{$urlKey}.mp4";


$downCmd = "./mcurl.sh -u \"$videoUrl\"";
system($downCmd);
if (!is_readable($oldFileName)) {
    echo "Download fail" . PHP_EOL;
    exit;
}

echo PHP_EOL;
echo PHP_EOL;
echo "Rename $oldFileName --> $newFileName" . PHP_EOL;
rename($oldFileName, $newFileName);
echo PHP_EOL;


## mcurl.sh
#!/bin/bash
#
# Simulate multiple threads downloading by forking many process
# Copyright 2016 Wanghong Lin
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# 	http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Changelog
# v0.1        initial version
# v0.1.1      add output option

slices=20
url=
output=

__ScriptVersion="v0.1.1"

#===  FUNCTION  ================================================================
#         NAME:  usage
#  DESCRIPTION:  Display usage information.
#===============================================================================
function usage ()
{
	echo "Usage :  $0 [options]

    Options:
    -h|help       Display this message
    -v|version    Display script version
    -u|url        The URL to download
    -s|slice      How many slices the download task will split, default is 20
    -o|output     Specify the output file name, use the guessing file name from url as output file name if not specify this option"

}    # ----------  end of function usage  ----------

#-----------------------------------------------------------------------
#  Handle command line arguments
#-----------------------------------------------------------------------

while getopts ":hvu:s:o:" opt
do
  case $opt in

	h|help     )  usage; exit 0   ;;

	v|version  )  echo "Multi tasks downloader for curl, version $__ScriptVersion"; exit 0   ;;

    u|url      )  url=$OPTARG ;;

    s|slice    )  slices=$OPTARG ;;

	o|output   )  output=$OPTARG ;;

	* )  echo -e "\n  Option does not exist : $OPTARG\n"
		  usage; exit 1   ;;

  esac    # --- end of case ---
done
shift $(($OPTIND-1))

[ -z $url ] && { usage; exit 1; }

path=${url##*/}
file_to_save=${path%\?*}

[ x$output != x ] && file_to_save=$output

echo Download $url to $file_to_save with $slices tasks.

size_in_byte=$(curl -I "$url" 2>/dev/null | sed -n 's/\(Content-Length:\)\(.*\)/\2/p' | tr -d [[:space:]])
size_per_slice=$(($size_in_byte/$slices))

total_slice=$(($slices+1))
finished_slice=0
is_finished=0
function callback()
{
	subp=$(pgrep -P $$ | wc -l)
	if [  $subp -eq 1 ];then
		for s in `seq $total_slice`
		do
			cat $$.$s >> "${file_to_save}"
			rm $$.$s
		done
		is_finished=1
	fi
}

function run()
{
	curl -r $2-$3 $url -o $1 2>/dev/null && kill -n 10 $$ &
}

trap callback 10

start_time=$(date +%s)
for s in `seq $total_slice`
do
	begin=$((($s-1)*${size_per_slice}))
	if [ $begin -ne 0 ];then
		begin=$((begin+=1))
	fi
	end=$(($s*$size_per_slice))
	if [ $end -gt $size_in_byte ];then
		end=
	fi
	run $$.$s $begin $end
done

until [ $is_finished -eq 1 ]
do
	if [ -f $$.1 ];then
		total_kb=$(du -b $$.* | awk '{t+=$1}END{printf "%d", t/1024}')
		duration=$((`date +%s`-$start_time))
		[ $duration -gt 0 ] && printf "\rCurrent average speed %4dKiB/s" $(($total_kb/$duration))
	fi
	sleep 1
done
	<?php

	if (2 > $argc) {
	$basename = basename(__FILE__);
	echo "
	Usage: $basename PORNHUB_URL

	Need mcurl.sh to download video.

	Notice: Cannot run on Mac OSX due to mcurl.sh compatible.

	" . PHP_EOL;
	exit;
	}

	$url = $argv[1];
	$opts = ['http' => [
	'header' => 'User-Agent:Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
	]];
	$context = stream_context_create($opts);
	$html = file_get_contents($url, false, $context);
	if (empty($html)) {
	echo "Retrieve url fail" . PHP_EOL;
	exit;
	}

	$urlKey = strstr($url, '?', false);
	$urlKey = str_replace('?viewkey=', '', $urlKey);

	$matches = [];
	preg_match('/"quality":"([^"]+)","videoUrl":"([^"]+)"/', $html, $matches);
	if (empty($matches)) {
	echo "Video url not found" . PHP_EOL;
	exit;
	}
	$quality = $matches[1];
	$videoUrl = stripslashes($matches[2]);

	$oldFileName = strstr($videoUrl, '?', true);
	$oldFileName = substr(strrchr($oldFileName, '/'), 1);

	preg_match('/<title>([^<]+)<\/title>/', $html, $matches);
	if (empty($matches)) {
	echo "Title not found" . PHP_EOL;
	exit;
	}

	$title = $matches[1];
	$title = str_replace('- Pornhub.com', '', $title);
	$title = trim($title, " .");
	$title = str_replace('/', '／', $title);
	$newFileName = preg_replace("[^\w\s\d\.\-_~,;:\[\]\(\]]", '', $title) .
	".{$urlKey}.mp4";


	$downCmd = "./mcurl.sh -u \"$videoUrl\"";
	system($downCmd);
	if (!is_readable($oldFileName)) {
	echo "Download fail" . PHP_EOL;
	exit;
	}

	echo PHP_EOL;
	echo PHP_EOL;
	echo "Rename $oldFileName --> $newFileName" . PHP_EOL;
	rename($oldFileName, $newFileName);
	echo PHP_EOL;
	#!/bin/bash
	#
	# Simulate multiple threads downloading by forking many process
	# Copyright 2016 Wanghong Lin
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	#
	# Changelog
	# v0.1 initial version
	# v0.1.1 add output option

	slices=20
	url=
	output=

	__ScriptVersion="v0.1.1"

	#=== FUNCTION ================================================================
	# NAME: usage
	# DESCRIPTION: Display usage information.
	#===============================================================================
	function usage ()
	{
	echo "Usage : $0 [options]

	Options:
	-h\|help Display this message
	-v\|version Display script version
	-u\|url The URL to download
	-s\|slice How many slices the download task will split, default is 20
	-o\|output Specify the output file name, use the guessing file name from url as output file name if not specify this option"

	} # ---------- end of function usage ----------

	#-----------------------------------------------------------------------
	# Handle command line arguments
	#-----------------------------------------------------------------------

	while getopts ":hvu:s:o:" opt
	do
	case $opt in

	h\|help ) usage; exit 0 ;;

	v\|version ) echo "Multi tasks downloader for curl, version $__ScriptVersion"; exit 0 ;;

	u\|url ) url=$OPTARG ;;

	s\|slice ) slices=$OPTARG ;;

	o\|output ) output=$OPTARG ;;

	* ) echo -e "\n Option does not exist : $OPTARG\n"
	usage; exit 1 ;;

	esac # --- end of case ---
	done
	shift $(($OPTIND-1))

	[ -z $url ] && { usage; exit 1; }

	path=${url##*/}
	file_to_save=${path%\?*}

	[ x$output != x ] && file_to_save=$output

	echo Download $url to $file_to_save with $slices tasks.

	size_in_byte=$(curl -I "$url" 2>/dev/null \| sed -n 's/\(Content-Length:\)\(.*\)/\2/p' \| tr -d [[:space:]])
	size_per_slice=$(($size_in_byte/$slices))

	total_slice=$(($slices+1))
	finished_slice=0
	is_finished=0
	function callback()
	{
	subp=$(pgrep -P $$ \| wc -l)
	if [ $subp -eq 1 ];then
	for s in `seq $total_slice`
	do
	cat $$.$s >> "${file_to_save}"
	rm $$.$s
	done
	is_finished=1
	fi
	}

	function run()
	{
	curl -r $2-$3 $url -o $1 2>/dev/null && kill -n 10 $$ &
	}

	trap callback 10

	start_time=$(date +%s)
	for s in `seq $total_slice`
	do
	begin=$((($s-1)*${size_per_slice}))
	if [ $begin -ne 0 ];then
	begin=$((begin+=1))
	fi
	end=$(($s*$size_per_slice))
	if [ $end -gt $size_in_byte ];then
	end=
	fi
	run $$.$s $begin $end
	done

	until [ $is_finished -eq 1 ]
	do
	if [ -f $$.1 ];then
	total_kb=$(du -b $$.* \| awk '{t+=$1}END{printf "%d", t/1024}')
	duration=$((`date +%s`-$start_time))
	[ $duration -gt 0 ] && printf "\rCurrent average speed %4dKiB/s" $(($total_kb/$duration))
	fi
	sleep 1
	done