Last active
November 10, 2019 21:47
-
-
Save jaksm/96b2dde8b8116835240d2e4409833759 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as _ from 'lodash'; | |
import { input } from './input'; | |
type URLTable = URLTableEntry[]; | |
interface URLTableEntry { | |
url: string; | |
keywords: string[]; | |
} | |
const uniqueUrls: string[] = _.uniq(_.flattenDeep(input.map(({ urls }) => urls))); | |
console.log('Input length:', _.flattenDeep(input.map(({ urls }) => urls)).length); | |
console.log('Uniq URLS length:', uniqueUrls.length); | |
// Determine, for each keyword, associated URLs | |
const URLTable: URLTable = uniqueUrls | |
.map(url => ({ | |
url, | |
keywords: input.filter(({ urls }) => urls.includes(url)).map(({ keyword }) => keyword), | |
})) | |
.filter(({ keywords }) => keywords.length >= 3); // Only keep keywords which have at least 3 URLs | |
// console.log('URL Table', JSON.stringify(URLTable, null, 2)); | |
// Generate kept keywords (candidates) | |
const candidatesList = _.flattenDeep(URLTable.map(({ keywords }) => keywords)); | |
const candidates = input.filter(({ keyword }) => candidatesList.includes(keyword)); | |
console.log('Kept keywords diff:', input.length - candidates.length); // must be positive | |
// Generate all combinations (pairs) among kept keywords (candidates) | |
const combinations = []; | |
for (let i = 0; i < candidates.length - 1; i++) { | |
// This is where you'll capture that last value | |
for (let j = i + 1; j < candidates.length; j++) { | |
// @ts-ignore | |
const urlsInCommon = _.intersection(candidates[i].urls, candidates[j].urls).length; | |
combinations.push({ pair: [candidates[i], candidates[j]], urlsInCommon }); | |
} | |
} | |
// Determine for each such combination URLs in common | |
// Only keep combos with at least 3 URLs in common | |
const validCombinations = _.reverse( | |
_.sortBy(combinations.filter(({ urlsInCommon }) => urlsInCommon >= 3), 'urlsInCommon'), | |
); | |
// console.log(validCombinations); | |
// COST FUNCTION: When a keyword is present in more than one combo, choose the one with more urlsInCommon | |
// PROBLEM STARTS HERE | |
const costIteration1 = candidatesList.reduce((acc, curr, i) => { | |
const possiblePairs = validCombinations.filter(({ pair }) => | |
pair.map(({ keyword }) => keyword).includes(curr), | |
); | |
const chosenPair = _.reverse(_.sortBy(possiblePairs, 'urlsInCommon'))[0]; | |
return [chosenPair, ...acc]; | |
}, []); | |
console.log('Iteration #1 diff:', validCombinations.length - costIteration1.length); | |
console.log(costIteration1); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey Jaksa,
A dev from GitHub linked me this. I don't believe it's supposed to public?