Skip to content

Instantly share code, notes, and snippets.

@Coldblackice
Forked from olejorgenb/places-exploration.sql
Created April 20, 2023 15:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Coldblackice/5ee2f4111b0ce1f438062d9a49f113e2 to your computer and use it in GitHub Desktop.
Save Coldblackice/5ee2f4111b0ce1f438062d9a49f113e2 to your computer and use it in GitHub Desktop.
Firefox places.sqlite exploration
--- Firefox places.sqlite exploration
-- https://developer.mozilla.org/en-US/docs/Mozilla/Tech/Places/Database
-- https://wiki.mozilla.org/images/d/d5/Places.sqlite.schema3.pdf
-- http://forensicswiki.org/wiki/Mozilla_Firefox_3_History_File_Format (probably somewhat outdated)
-- [select text -> right click -> search] does not set from_visit :(
--- Magic numbers
-- moz_historyvisits.visit_type:
-- https://dxr.mozilla.org/mozilla-esr60/source/toolkit/components/places/nsINavHistoryService.idl#1185
-- 1 TRANSITION_LINK The user followed a link and got a new toplevel window.
-- 2 TRANSITION_TYPED The user typed the page's URL in the URL bar or selected it from URL bar autocomplete results, clicked on it from a history query (from the History sidebar, History menu, or history query in the personal toolbar or Places organizer.
-- 3 TRANSITION_BOOKMARK The user followed a bookmark to get to the page.
-- 4 TRANSITION_EMBED Set when some inner content is loaded. This is true of all images on a page, and the contents of the iframe. It is also true of any content in a frame, regardless of whether or not the user clicked something to get there.
-- 5 TRANSITION_REDIRECT_PERMANENT The transition was a permanent redirect.
-- 6 TRANSITION_REDIRECT_TEMPORARY The transition was a temporary redirect.
-- 7 TRANSITION_DOWNLOAD The transition is a download.
-- 8 TRANSITION_FRAMED_LINK The user followed a link and got a visit in a frame.
-- 9 TRANSITION_RELOAD The page has been reloaded.
-- Cookies
select value from moz_cookies c
where c.name == "sessionToken" and c.host like "%.riddles.io"
-- Most visited hosts (the host name is reversed :D)
select p.rev_host, count(*)
from moz_places p
join moz_historyvisits v on v.place_id = p.id
group by p.rev_host order by count(*) desc;
-- Most visted places
select * from moz_places order by visit_count desc;
-- First ever visit
select date(min(visit_date)/1000000 , 'unixepoch') from moz_historyvisits;
-- Misc stats
select
((select count(*) from moz_places)+(select count(*) from moz_historyvisits)) as total,
(select count(*) from moz_places) as places,
(select count(*) from moz_historyvisits) as visits,
(select count(*) from moz_inputhistory) as inputs,
(select count(distinct rev_host) from moz_places) as hosts
;
-- Visits grouped by type
select visit_type, count(1) from moz_historyvisits group by visit_type;
-- Visits without a source (grouped by type)
select visit_type, count(*) from moz_historyvisits where from_visit = 0 group by visit_type;
-- Urls visited from a google search (or.. a google domain)
select count(pb.id)
from moz_historyvisits a
join moz_historyvisits b on a.id = b.from_visit
join moz_places pa on a.place_id = pa.id
join moz_places pb on b.place_id = pb.id
where pa.url like "%google.com%" and pb.url not like "%google%"
;
-- Approx google searches
select count(*) from moz_places where url like "%google%/search?%";
-- Approx duckduckgo searches
select count(*) from moz_places where url like "%duckduckgo%/?q=%";
-- Visists to a url
select datetime(hv.visit_date/1000000 , 'unixepoch') as visit_date
from moz_historyvisits hv join moz_places p on p.id = hv.place_id
where p.url = "$URL"
;
-- Number of outgoing visits from a single history vist (NB: not counting redirects..)
select p.url, a.id, sum(b.visit_type not in (5,6)) as children
from moz_historyvisits a join moz_historyvisits b on a.id = b.from_visit
join moz_places p on p.id = a.place_id
group by a.id, p.id
order by children desc
;
-- Number of outgoing visits from a place (NB: not counting redirects..)
select p.url,
sum(b.visit_type not in (5,6)) as children,
sum(b.visit_type in (5,6)) as redirects
from moz_historyvisits a join moz_historyvisits b on a.id = b.from_visit
join moz_places p on p.id = a.place_id
group by p.id
order by children desc
;
-- Where did I find this url? (No matches means typed in or external source)
select from_place.id, from_place.url
from
moz_places as this_place,
moz_historyvisits as hv,
moz_historyvisits as hv_from,
moz_places as from_place
where
this_place.url like "%$URL%" and
this_place.id = hv.place_id and
hv.from_visit - 3 = hv_from.id and
hv_from.place_id = from_place.id
-- and from_place.rev_host <> this_place.rev_host -- optional filter
;
-- Dangling visits..
select count(1) from moz_historyvisits a left join moz_historyvisits b on a.from_visit = b.id
where b.id is null and a.from_visit > 0
;
-- Find all places visited less than N "hoops" away from a host
with recursive
host_visits(id) as
(select hv.id
from moz_places p
join moz_historyvisits hv on hv.place_id = p.id
where p.rev_host like "moc.rotanibmocy.swen%")
,
host_reachable(id, dist) as
(select *, 0 as dist from host_visits
union
select hv.id, (case when hv.visit_type in (5,6) then rr.dist else rr.dist+1 end) as dist
from host_reachable rr join moz_historyvisits hv on rr.id = hv.from_visit)
select
p.url, p.title,
datetime(max(hv.visit_date)/1000000 , 'unixepoch') as latest_visit_date,
count(hv.id), min(rr.dist)
from host_reachable rr
join moz_historyvisits hv on hv.id = rr.id
join moz_places p on p.id = hv.place_id
where
p.rev_host not like "moc.rotanibmocy.swen%"
-- and hv.visit_type not in (5,6)
and rr.dist < 20
group by p.url, p.title
order by latest_visit_date desc
;
-- Not sure what this is - long time ago
with recursive
leaf_path(leaf_id, id) as
(select leaf.id, leaf.id from moz_historyvisits leaf
left join moz_historyvisits child on child.from_visit = leaf.id
where child.id is null
union all
select lp.leaf_id, v.from_visit as id
from leaf_path lp join moz_historyvisits v on v.id = lp.id)
,
path_length(leaf_id, length) as
(select leaf_id, count(*) as length
from leaf_path lp join moz_historyvisits hv on hv.id = lp.id
where hv.visit_type not in (5, 6) -- ignore redirects
group by leaf_id)
,
root(leaf_id, root_id) as
(select leaf_id, min(id) as root_id from leaf_path)
,
distinct_hosts(leaf_id, count) as
(select leaf_id, count(distinct p.rev_host) as count
from leaf_path lp
join moz_historyvisits hv on hv.id = lp.id
join moz_places p on hv.place_id = p.id
where hv.visit_type not in (5, 6) -- ignore redirects
group by lp.leaf_id)
select lp.leaf_id, lp.id, hv.visit_type, p.url
from leaf_path lp
join moz_historyvisits hv on hv.id = lp.id
join moz_places p on p.id = hv.place_id
join path_length pl on pl.leaf_id = lp.leaf_id
join distinct_hosts dh on dh.leaf_id = lp.leaf_id
where dh.count > 6 and pl.length > 3
order by pl.length desc, lp.leaf_id, lp.id;
-- lp.leaf_id = (select leaf_id from (select leaf_id from path_length order by length desc limit 1 offset 100))
select * from moz_inputhistory order by length(input) desc;
select LENGTH(url), count(1) from moz_places group by LENGTH(url);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment