-
-
Save huksley/bc3cb046157a99cd9d1517b32f91a99e to your computer and use it in GitHub Desktop.
/** | |
* This magically uses batchexecute protocol. It's not documented, but it works. | |
* | |
* Licensed under: MIT License | |
* | |
* Copyright (c) 2024 Ruslan Gainutdinov | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining a copy | |
* of this software and associated documentation files (the "Software"), to deal | |
* in the Software without restriction, including without limitation the rights | |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
* copies of the Software, and to permit persons to whom the Software is | |
* furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included | |
* in all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
* SOFTWARE. | |
*/ | |
const fetchDecodedBatchExecute = (id: string) => { | |
const s = | |
'[[["Fbv4je","[\\"garturlreq\\",[[\\"en-US\\",\\"US\\",[\\"FINANCE_TOP_INDICES\\",\\"WEB_TEST_1_0_0\\"],null,null,1,1,\\"US:en\\",null,180,null,null,null,null,null,0,null,null,[1608992183,723341000]],\\"en-US\\",\\"US\\",1,[2,3,4,8],1,0,\\"655000234\\",0,0,null,0],\\"' + | |
id + | |
'\\"]",null,"generic"]]]'; | |
return fetch("https://news.google.com/_/DotsSplashUi/data/batchexecute?" + "rpcids=Fbv4je", { | |
headers: { | |
"Content-Type": "application/x-www-form-urlencoded;charset=utf-8", | |
Referrer: "https://news.google.com/" | |
}, | |
body: "f.req=" + encodeURIComponent(s), | |
method: "POST" | |
}) | |
.then(e => e.text()) | |
.then(s => { | |
const header = '[\\"garturlres\\",\\"'; | |
const footer = '\\",'; | |
if (!s.includes(header)) { | |
throw new Error("header not found: " + s); | |
} | |
const start = s.substring(s.indexOf(header) + header.length); | |
if (!start.includes(footer)) { | |
throw new Error("footer not found"); | |
} | |
const url = start.substring(0, start.indexOf(footer)); | |
return url; | |
}); | |
}; | |
/** | |
* Google News started generate encoded, internal URLs for RSS items | |
* https://news.google.com/rss/search?q=New%20York%20when%3A30d&hl=en-US&gl=US&ceid=US:en | |
* | |
* This script decodes URLs into original one, for example URL | |
* https://news.google.com/__i/rss/rd/articles/CBMiSGh0dHBzOi8vdGVjaGNydW5jaC5jb20vMjAyMi8xMC8yNy9uZXcteW9yay1wb3N0LWhhY2tlZC1vZmZlbnNpdmUtdHdlZXRzL9IBAA?oc=5 | |
* | |
* contains this | |
* https://techcrunch.com/2022/10/27/new-york-post-hacked-offensive-tweets/ | |
* | |
* In path after articles/ goes Base64 encoded binary data | |
* | |
* Format is the following: | |
* <prefix> <len bytes> <URL bytes> <len bytes> <amp URL bytes> [<suffix>] | |
* | |
* <prefix> - 0x08, 0x13, 0x22 | |
* <suffix> - 0xd2, 0x01, 0x00 (sometimes missing??) | |
* <len bytes> - formatted as 0x40 or 0x81 0x01 sometimes | |
* | |
* | |
* https://news.google.com/rss/articles/CBMiqwFBVV95cUxNMTRqdUZpNl9hQldXbGo2YVVLOGFQdkFLYldlMUxUVlNEaElsYjRRODVUMkF3R1RYdWxvT1NoVzdUYS0xSHg3eVdpTjdVODQ5cVJJLWt4dk9vZFBScVp2ZmpzQXZZRy1ncDM5c2tRbXBVVHVrQnpmMGVrQXNkQVItV3h4dVQ1V1BTbjhnM3k2ZUdPdnhVOFk1NmllNTZkdGJTbW9NX0k5U3E2Tkk?oc=5 | |
* https://news.google.com/rss/articles/CBMidkFVX3lxTFB1QmFsSi1Zc3dLQkpNLThKTXExWXBGWlE0eERJQ2hLRENIOFJzRTlsRnM1NS1Hc2FlbjdIMlZ3eWNQa0JqeVYzZGs1Y0hKaUtTUko2dmJabUtVMWZob0lNSFNCa3NLQ05ROGh4cVZfVTYyUDVxc2c?oc=5 | |
* https://news.google.com/rss/articles/CBMiqwFBVV95cUxNMTRqdUZpNl9hQldXbGo2YVVLOGFQdkFLYldlMUxUVlNEaElsYjRRODVUMkF3R1RYdWxvT1NoVzdUYS0xSHg3eVdpTjdVODQ5cVJJLWt4dk9vZFBScVp2ZmpzQXZZRy1ncDM5c2tRbXBVVHVrQnpmMGVrQXNkQVItV3h4dVQ1V1BTbjhnM3k2ZUdPdnhVOFk1NmllNTZkdGJTbW9NX0k5U3E2Tkk?oc=5 | |
* | |
* FIXME: What will happen if URL more than 255 bytes?? | |
* | |
* Licensed under: MIT License | |
* | |
* Copyright (c) 2022 Ruslan Gainutdinov | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining a copy | |
* of this software and associated documentation files (the "Software"), to deal | |
* in the Software without restriction, including without limitation the rights | |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
* copies of the Software, and to permit persons to whom the Software is | |
* furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included | |
* in all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
* SOFTWARE. | |
*/ | |
export const decodeGoogleNewsUrl = async (sourceUrl: string) => { | |
const url = new URL(sourceUrl); | |
const path = url.pathname.split("/"); | |
if ( | |
url.hostname === "news.google.com" && | |
path.length > 1 && | |
path[path.length - 2] === "articles" | |
) { | |
const base64 = path[path.length - 1]; | |
let str = atob(base64); | |
const prefix = Buffer.from([0x08, 0x13, 0x22]).toString("binary"); | |
if (str.startsWith(prefix)) { | |
str = str.substring(prefix.length); | |
} | |
const suffix = Buffer.from([0xd2, 0x01, 0x00]).toString("binary"); | |
if (str.endsWith(suffix)) { | |
str = str.substring(0, str.length - suffix.length); | |
} | |
// One or two bytes to skip | |
const bytes = Uint8Array.from(str, c => c.charCodeAt(0)); | |
const len = bytes.at(0)!; | |
if (len >= 0x80) { | |
str = str.substring(2, len + 2); | |
} else { | |
str = str.substring(1, len + 1); | |
} | |
if (str.startsWith("AU_yqL")) { | |
// New style encoding, introduced in July 2024. Not yet known how to decode offline. | |
const url = await fetchDecodedBatchExecute(base64); | |
return url; | |
} | |
return str; | |
} else { | |
return sourceUrl; | |
} | |
}; |
The old method is broken again
Can confirm... broken again.
@huksley old method is not working....
Here is another option
https://www.bing.com/news/search?q=wordpress&format=rss&count=24
Url is extractable
I will be switching over.
It does not give you a max of 100 results, but thats okay as that is normally older less interesting news sometimes. and most people will not scroll that far.
I would rather have 24 trending stories than 100 random ones over the last 30 days or so.
https://www.bing.com/news/search?q=wordpress&format=rss&count=24
A lot of the articles don't have meta images to scrape though. Kinda circles back to the same problem for me... no preview images
Multi treaded curl to grab the og:image off of the site with the article in the background
Claud ai will show you how to write it or I can provide you some php code
I wrote MS asking for permission to use their feed. Still waiting for a reply.
Multi treaded curl to grab the og:image off of the site with the article in the background Claud ai will show you how to write it or I can provide you some php code
What I'm saying is many of those Bing articles don't have og:images.
'\u003d' should be replaced with '='
url = url.split('\u003d').join('=');
Do you have any ideas or solutions?
Currently, the old method is completely broken.
How do we get direct links to sources in Google News?
@maks-outsource New method involved fetching and decoding on the server side, the gist have been updated to that. Is it working for you?
I have been trying to get inspired from this new version to adapt my N8N workflow but without success
https://community.n8n.io/t/solved-base64-decode-google-news-urls-with-a-function-node/
@benborges n8n handles custom JS just fine, you just need to make sure that inputs and outputs for the N8N custom code node are the correct for the script.
@benborges n8n handles custom JS just fine, you just need to make sure that inputs and outputs for the N8N custom code node are the correct for the script.
I know, my previous workflow used to work just fine, but I can't really integrate yours, i'm confused about how to adapt it to my n8n workflow https://community.n8n.io/t/solved-base64-decode-google-news-urls-with-a-function-node/29019/9?u=benb
It works but in many cases, requests to Google return 302 and a redirect to the Google sorry page with Unsuaul traffic coming from your network
I hope we can find a way to decode them offline
It works but in many cases, requests to Google return 302 and a redirect to the Google sorry page with Unsuaul traffic coming from your network
I hope we can find a way to decode them offline
Yeah offline would be great but I don't think that's going to happen.
I'm getting a 400 when trying to use the batchexecute api can someone take a peak and see if something jumps out as wrong?
REQUEST: https://news.google.com/_/DotsSplashUi/data/batchexecute?rpcids=Fbv4je
METHOD: HttpMethod(value=POST)
COMMON HEADERS
-> Accept: /
-> Accept-Charset: UTF-8
CONTENT HEADERS
-> Content-Length: 476
-> Content-Type: text/plain; charset=UTF-8
BODY Content-Type: text/plain; charset=UTF-8
BODY START
f.req=[[["Fbv4je","[\"garturlreq\",[[\"en-US\",\"US\",[\"FINANCE_TOP_INDICES\",\"WEB_TEST_1_0_0\"],null,null,1,1,\"US:en\",null,180,null,null,null,null,null,0,null,null,[1608992183,723341000]],\"en-US\",\"US\",1,[2,3,4,8],1,0,\"655000234\",0,0,null,0],\"CBMiggFBVV95cUxPSlIzYnZUbVFZakd5RXZGOER3dmZwOUZJV1ZOREI1WEliVlJpU011MWdoWDRzOGNVblF0V3FLd2w4ak9jenZXS0d4ZHBhSUswTmZfOHJ4Tk5DbFF0UFlmY1YzQWVWb2FMYXp6SmJRYUI4eXJmczZHcGVtMmhyYm9BX0FR\"]",null,"generic"]]]
BODY END
Thanks @huksley, I've converted your script to a Python package. If anyone is looking for Python, they can find it [Google News Decoder Python]. If you update it, I'll try to update the package. Thank you so much.
Can someone explain what changed in Google News redirect URLs in simple terms ? When base64
decoded, I get output that starts with AU_yq
as opposed to just getting a URL. Currently, I am using @SSujitX 's package and can decode currently, but I am curious.
@ruthvik92 as best as i can guess - they switched from secretly-ish encoding the canonical article URLs inside the returned google news URL, to simply not encoding them inside the URL; i suspect it's now just stored somewhere separately on their servers, and the info in the new URLs is just an identifier pointing to that data. Therefore, to get the original URL you have to make a request to Google servers.
Hard to say the reason - maybe it's because this is unsanctioned use they want to prevent, maybe it's an internal change that happened to affect outside users. Personally for me though, faking sketchy API requests to get the data pushed this across the threshold from "clever hack" to "clearly something they don't want you doing", so I've switched to using and paying for the Bing News API.
This is not an unsanctioned use or something. They have a publicly available RSS feed, and it is supposed to contain an article link, not some tracking, obfuscated URL. If there were some authentication or paid plan for that, it would also be okay, but let's not imagine this is done so users (us) have a better experience.
To address the issue of request limitations with Google's API, I implemented the following solution:
Switched to Bing News API, which offers 1,000 free requests per month—ample for our needs.
Set up a daily cron job that:
Submits 6 predefined search terms to the Bing API
Stores the results in a PostgreSQL database
Modified the user-facing news retrieval process:
News is now pulled from our database instead of making live API calls
This approach eliminates API usage when users access news
While the news may be up to a day old, this trade-off is acceptable given the benefits of reliable access and reduced API dependency.
This solution effectively manages API limitations while ensuring consistent news availability for users.
Here is an example
Here is how I grap api info to db
`<?php
function newsgetf($searchterms, $maxnumberarticles)
{
date_default_timezone_set('GMT');
// Database connection parameters
$host = 'localhost';
$dbname = 'bingnews';
$user = '';
$password = '';
// Connect to the PostgreSQL database
$dbconn = pg_connect("host=$host dbname=$dbname user=$user password=$password");
if (!$dbconn) {
die("Connection failed: " . pg_last_error());
}
$subscription_key = "";
$endpoint = "https://api.bing.microsoft.com/v7.0/news/search";
// Prepare the SQL statements
$check_duplicate_sql = "SELECT url FROM news_articles WHERE url = $1";
$upsert_sql = "INSERT INTO news_articles (url, name, date_published, description, provider, og_image, search_terms, insertion_timestamp)
VALUES ($1, $2, $3, $4, $5, $6, ARRAY[$7], $8)
ON CONFLICT (url)
DO UPDATE SET
name = EXCLUDED.name,
date_published = EXCLUDED.date_published,
description = EXCLUDED.description,
provider = EXCLUDED.provider,
og_image = EXCLUDED.og_image,
search_terms = array_append(news_articles.search_terms, $7),
insertion_timestamp = EXCLUDED.insertion_timestamp";
foreach ($searchterms as $searchterm) {
$query = urlencode($searchterm);
$url = $endpoint . "?q=" . $query . "&count=" . $maxnumberarticles . "&mkt=en-US";
$headers = [
"Ocp-Apim-Subscription-Key: $subscription_key"
];
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
if ($response === false) {
echo "Error fetching results for '$searchterm': " . curl_error($ch) . "\n";
continue;
}
curl_close($ch);
$result = json_decode($response, true);
if (isset($result['value'])) {
$values = $result['value'];
// Fetch metadata for each article
$mh = curl_multi_init();
$handles = [];
foreach ($values as $key => $item) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $item['url']);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_multi_add_handle($mh, $ch);
$handles[$key] = $ch;
}
$running = null;
do {
curl_multi_exec($mh, $running);
} while ($running);
foreach ($handles as $key => $ch) {
$page_content = curl_multi_getcontent($ch);
if ($page_content !== false && !empty($page_content)) {
$dom_obj = new DOMDocument();
@$dom_obj->loadHTML($page_content, LIBXML_NOWARNING | LIBXML_NOERROR);
foreach ($dom_obj->getElementsByTagName('meta') as $meta) {
if ($meta->getAttribute('property') == 'og:image') {
$values[$key]['og_image'] = $meta->getAttribute('content');
}
if ($meta->getAttribute('property') == 'og:description') {
$values[$key]['og_description'] = $meta->getAttribute('content');
}
}
}
curl_multi_remove_handle($mh, $ch);
curl_close($ch);
}
curl_multi_close($mh);
// Insert or update each article in the database
$inserted_count = 0;
$updated_count = 0;
$duplicate_count = 0;
$error_count = 0;
$current_timestamp = date('Y-m-d H:i:s');
foreach ($values as $item) {
// Check if the article already exists
$check_result = pg_query_params($dbconn, $check_duplicate_sql, [$item['url']]);
if (pg_num_rows($check_result) > 0) {
$duplicate_count++;
continue;
}
$result = pg_query_params($dbconn, $upsert_sql, [
$item['url'],
$item['name'],
$item['datePublished'],
$item['og_description'] ?? $item['description'],
$item['provider'][0]['name'] ?? '',
$item['og_image'] ?? ($item['image']['originalImg'] ?? ($item['image']['thumbnail']['contentUrl'] ?? '')),
$searchterm,
$current_timestamp
]);
if ($result) {
$affected_rows = pg_affected_rows($result);
if ($affected_rows == 1) {
$inserted_count++;
} else {
$updated_count++;
}
} else {
echo "Error inserting/updating article for '$searchterm': " . pg_last_error($dbconn) . "\n";
$error_count++;
}
}
echo "For search term '$searchterm':\n";
echo " Inserted: $inserted_count, Updated: $updated_count, Duplicates: $duplicate_count, Errors: $error_count\n";
} else {
echo "No results found for: $searchterm\n";
}
}
// Close the database connection
pg_close($dbconn);
}
// Example usage
$search_terms = ['CHRISTIAN REVIVAL JESUS', 'PERSECUTED CHRISTIAN JESUS','Cybersecurity','DevOps','Technology advances or AI or machine learning or quantum computing or IoT'];
$max_articles_per_term = 100;
newsgetf($search_terms, $max_articles_per_term);`
Here is how I render what is in the db to html
`<?php
// Global variable to control Pica usage
$USE_PICA = false;
function newsgetf($searchterm, $maxnumberarticles)
{
global $USE_PICA;
date_default_timezone_set('GMT');
// Hardcoded entries
$values = [
[
'name' => 'About - Dallas-Fort Worth Church Eleven 32 - Non-Denominational Church - Allen, TX',
'url' => 'https://churcheleven32.com/about/',
'date_published' => date('D, d M Y H:i:s T'),
'description' => 'Church Eleven32 is a non-denominational church in the Dallas-Fort Worth area. We are dedicated to being a place for all people to know God. Learn more about our church here.',
'provider' => 'Church Eleven32',
'og_image' => 'https://churcheleven32.com/wp-content/uploads/2023/07/30981495541_52ff1465af_b.jpg'
],
[
'name' => 'St Sava Orthodox Church - Allen, TX',
'url' => 'https://stsavaoca.org/',
'date_published' => date('D, d M Y H:i:s T'),
'description' => 'Discover the roots of Christianity with the Orthodox Church, a faith tradition tracing back nearly 2,000 years to Jesus Christ and His Apostles. Orthodox Christianity preserves the original teachings, practices, and sacraments established by the early Church, offering a direct connection to apostolic times. With a rich history spanning continents and cultures, Orthodoxy emphasizes right worship and belief, carefully guarding the truth passed down through Holy Scripture and Sacred Tradition. Experience the depth and authenticity of this ancient yet living faith as you explore the origins of Christian spirituality.',
'provider' => 'St Sava Orthodox Church',
'og_image' => 'https://stsavaoca.org/s/img/wp-content/uploads/2024/03/StSavaChurch.jpg.webp'
]
];
// Database connection parameters
$host = 'localhost';
$dbname = 'bingnews';
$user = '';
$password = '';
// Connect to the PostgreSQL database
$dbconn = pg_connect("host=$host dbname=$dbname user=$user password=$password");
if (!$dbconn) {
die("Connection failed: " . pg_last_error());
}
// Prepare the SQL query
$query = "SELECT * FROM news_articles WHERE $1 = ANY(search_terms) ORDER BY date_published DESC LIMIT $2";
// Execute the query
$result = pg_query_params($dbconn, $query, array($searchterm, $maxnumberarticles));
if (!$result) {
echo "<h1>Error fetching results: " . pg_last_error($dbconn) . "</h1>";
pg_close($dbconn);
return;
}
$db_values = pg_fetch_all($result);
// Merge hardcoded entries with database results
if ($db_values) {
$values = array_merge($values, $db_values);
}
if ($values) {
echo "<h1><i class=\"fas fa-newspaper\" style=\"color: #3498db; margin-right: 10px;\"></i> Found " . count($values) . " News Articles for \"" . htmlspecialchars($searchterm, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8') . "\"</h1>";
echo "<div class=\"posts__container\">";
foreach ($values as $key => $item) {
$protocol = "https://";
$CurPageURL = $protocol . $_SERVER['HTTP_HOST'] . $_SERVER['REQUEST_URI'];
echo "
<script type=\"application/ld+json\">
{
\"@context\": \"https://schema.org/\",
\"@type\": \"NewsArticle\",
\"mainEntityOfPage\": {
\"@type\": \"WebPage\",
\"@id\": \"" . htmlspecialchars($CurPageURL, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8') . "\"
},
\"headline\": \"" . trim(htmlspecialchars($item['name'], ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8', false)) . "\",
\"description\": \"" . trim(htmlspecialchars($item['description'], ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8', false)) . "\",
\"image\": \"" . htmlspecialchars($item['og_image'] ?? '', ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8') . "\",
\"author\": {
\"@type\": \"Organization\",
\"name\": \"" . htmlspecialchars($item['provider'], ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8') . "\"
},
\"publisher\": {
\"@type\": \"Organization\",
\"name\": \"" . htmlspecialchars($item['provider'], ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8') . "\",
\"logo\": {
\"@type\": \"ImageObject\",
\"url\": \"\"
}
},
\"datePublished\": \"" . date("Y-m-d", strtotime($item['date_published'])) . "\"
}
</script>";
echo "<div class=\"div__post\">";
echo "<div class=\"time\"><time datetime=\"" . date("Y-m-d", strtotime($item['date_published'])) . "T" . date("H:i:s", strtotime($item['date_published'])) . "\">" . htmlspecialchars($item['date_published'], ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8') . " [#" . ($key + 1) . "]" . "</time></div>";
$image_url = $item['og_image'] ?? '';
if ($image_url) {
echo "<div class=\"center\"><img loading=\"lazy\" src=\"" . htmlspecialchars($image_url, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8') . "\" class=\"pica-resize\" style=\"width: 100%; height: auto;\" onerror=\"this.style.display='none'\" alt=\"" . trim(htmlspecialchars($item['name'], ENT_NOQUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8', false)) . "\"/></div>";
}
echo "<a href=\"" . htmlspecialchars($item['url'], ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8') . "\" target=\"_blank\" class=\"underline-on-hover\">" . htmlspecialchars($item['name'], ENT_NOQUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8', false) . "</a>";
echo "<div class=\"description__block\">" . htmlspecialchars($item['description'], ENT_NOQUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8', false) . "<br /></div></div>";
}
echo "</div>";
// Add Pica library and custom JavaScript
echo "
<script src=\"https://cdnjs.cloudflare.com/ajax/libs/pica/9.0.1/pica.min.js\"></script>
<script>
document.addEventListener('DOMContentLoaded', function() {
const usePica = " . ($USE_PICA ? 'true' : 'false') . ";
if (!usePica) {
document.querySelectorAll('img.pica-resize').forEach(img => {
img.style.width = '100%';
img.style.height = 'auto';
});
return;
}
const pica = window.pica({
features: ['js', 'wasm', 'ww']
});
const resizeImage = (img) => {
const container = img.parentElement;
const containerWidth = container.clientWidth;
// Create a temporary image to get the natural aspect ratio
const tempImg = new Image();
tempImg.src = img.src;
tempImg.onload = () => {
const aspectRatio = tempImg.naturalWidth / tempImg.naturalHeight;
const targetHeight = containerWidth / aspectRatio;
const canvas = document.createElement('canvas');
canvas.width = containerWidth;
canvas.height = targetHeight;
pica.resize(tempImg, canvas, {
quality: 3,
alpha: true,
unsharpAmount: 80,
unsharpRadius: 0.6,
unsharpThreshold: 2
})
.then(result => pica.toBlob(result, 'image/png', 1.0))
.then(blob => {
const resizedUrl = URL.createObjectURL(blob);
img.src = resizedUrl;
img.style.width = '100%';
img.style.height = 'auto';
})
.catch(err => {
console.error('Pica error:', err);
img.style.width = '100%';
img.style.height = 'auto';
});
};
};
const observer = new ResizeObserver(entries => {
entries.forEach(entry => {
const img = entry.target.querySelector('img.pica-resize');
if (img) {
resizeImage(img);
}
});
});
document.querySelectorAll('.center').forEach(container => {
const img = container.querySelector('img.pica-resize');
if (img) {
observer.observe(container);
img.addEventListener('load', () => resizeImage(img));
}
});
});
</script>";
echo "<hr/><div class=\"center\"><b><i><a href=\"https://www.biblegateway.com/passage/?search=1+John+4%3A7-21&version=NIV\" target=\"_blank\">God Is Love - 1 John 4:7-21</a></i></b></div></div></body></html>";
} else {
echo "<h1>No results found for: " . htmlspecialchars($searchterm, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE, 'UTF-8') . "</h1>";
}
// Close the database connection
pg_close($dbconn);
}
// Example usage
// $USE_PICA = true; // Set this to false to disable Pica image resizing
// $searchterm = 'CHRISTIAN REVIVAL JESUS';
// $max_articles = 10;
// newsgetf($searchterm, $max_articles);
?>`
Here is the sql for the tables of the db
`-- Table: public.news_articles
-- DROP TABLE IF EXISTS public.news_articles;
CREATE TABLE IF NOT EXISTS public.news_articles
(
id integer NOT NULL DEFAULT nextval('news_articles_id_seq'::regclass),
url text COLLATE pg_catalog."default" NOT NULL,
name text COLLATE pg_catalog."default" NOT NULL,
date_published timestamp without time zone NOT NULL,
description text COLLATE pg_catalog."default",
provider character varying(255) COLLATE pg_catalog."default",
og_image text COLLATE pg_catalog."default",
search_terms text[] COLLATE pg_catalog."default",
insertion_timestamp timestamp without time zone NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT news_articles_pkey PRIMARY KEY (id),
CONSTRAINT news_articles_url_key UNIQUE (url)
)
TABLESPACE pg_default;
ALTER TABLE IF EXISTS public.news_articles
OWNER to postgres;
GRANT ALL ON TABLE public.news_articles TO coviduser;
GRANT ALL ON TABLE public.news_articles TO postgres;
-- Index: idx_date_published
-- DROP INDEX IF EXISTS public.idx_date_published;
CREATE INDEX IF NOT EXISTS idx_date_published
ON public.news_articles USING btree
(date_published ASC NULLS LAST)
TABLESPACE pg_default;
-- Index: idx_insertion_timestamp
-- DROP INDEX IF EXISTS public.idx_insertion_timestamp;
CREATE INDEX IF NOT EXISTS idx_insertion_timestamp
ON public.news_articles USING btree
(insertion_timestamp ASC NULLS LAST)
TABLESPACE pg_default;
-- Index: idx_search_terms
-- DROP INDEX IF EXISTS public.idx_search_terms;
CREATE INDEX IF NOT EXISTS idx_search_terms
ON public.news_articles USING gin
(search_terms COLLATE pg_catalog."default")
TABLESPACE pg_default;
-- Index: idx_url
-- DROP INDEX IF EXISTS public.idx_url;
CREATE INDEX IF NOT EXISTS idx_url
ON public.news_articles USING btree
(url COLLATE pg_catalog."default" ASC NULLS LAST)
TABLESPACE pg_default;`
It appears that Google may be utilising a combination of Base64 encoding, Protocol Buffers, and AES256 encryption, although I am not entirely certain of this.
To elaborate, when decoding Base64 (URL Safe) into Protocol Buffer using the provided message structure, it occasionally produces one or two hashes that begin with 'AU_yqL'. This prefix could either indicate the start of the hash or a URL, such as https://.
The message structure in Protocol Buffer is defined as follows:
message Article {
optional int32 version = 1;
optional string unknown4 = 4;
optional string unknown26 = 26;
}
Does anyone have insights into whether AES256 encryption is indeed being used in this context, or any guidance on how to decrypt it?
Original Hash: CBMiR0FVX3lxTE5fVEhXSDU2YnFhcDk1bGR6RjUtemduZllBb0QwQ2tHVHprVTc4SzFvU3dkUjJaZFNFZ250eHhxblhkc0ZKMmdV
Hash output: AU_yqLN_THWH56bqap95ldzF5-zgnfYAoD0CkGTzkU78K1oSwdR2ZdSEgntxxqnXdsFJ2gU
Hash normalized: AU/yqLN/THWH56bqap95ldzF5+zgnfYAoD0CkGTzkU78K1oSwdR2ZdSEgntxxqnXdsFJ2gU
What should be the result: http://www.jazzandbeyond.com.au/
Pretty sure this solution just broke
def busted :(
@huksley this request solution is broken
Unfortunately, the solution that @huksley proposed no longer works. It produces errors like this: [["wrb.fr","Fbv4je",null,null,null,[3],"generic"],["di",10],["af.httprm",10,"2111786207358723693",9]]
.
Not working the old method