Skip to content

Instantly share code, notes, and snippets.

@ross-spencer
Last active February 18, 2022 14:28
Show Gist options
  • Save ross-spencer/1f6cf16c9f966b980db277f7a932d81b to your computer and use it in GitHub Desktop.
Save ross-spencer/1f6cf16c9f966b980db277f7a932d81b to your computer and use it in GitHub Desktop.
demystify analysis queries
---
/* demystify: select the format identification tool used
*/
SELECT DBMD.TOOL_TYPE FROM DBMD
Query took: 0.0002048015594482422 seconds
---
/* demystify: select all namespace data
*/
SELECT * FROM NSDATA
Query took: 4.2438507080078125e-05 seconds
---
/* demystify: count the number of namespaces used by an identification report
*/
SELECT COUNT(NSDATA.NS_ID) FROM NSDATA
Query took: 4.029273986816406e-05 seconds
---
/* demystify: select the checksum type used by the database
*/
SELECT DBMD.HASH_TYPE FROM DBMD
Query took: 5.91278076171875e-05 seconds
---
/* demystify: sum the total bytes used by all files in the database
*/
SELECT SUM(FILEDATA.SIZE) FROM FILEDATA
Query took: 0.00011754035949707031 seconds
---
/* demystify: count all files in the database
*/
SELECT COUNT(FILEDATA.FILE_ID) FROM FILEDATA WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container')
Query took: 0.00010251998901367188 seconds
---
/* demystify: count the number of container files in the database
*/
SELECT COUNT(FILEDATA.FILE_ID) FROM FILEDATA WHERE FILEDATA.TYPE='Container'
Query took: 7.963180541992188e-05 seconds
---
/* demystify: count the number of files in container objects, e.g. zip
*/
SELECT COUNT(FILEDATA.FILE_ID) FROM FILEDATA WHERE (FILEDATA.URI_SCHEME!='file') AND (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container')
Query took: 7.891654968261719e-05 seconds
---
/* demystify: count the number of unique identifications
*/
SELECT COUNT(FILEDATA.FILE_ID) FROM FILEDATA WHERE FILEDATA.TYPE='Folder'
Query took: 7.772445678710938e-05 seconds
---
/* demystify: count the number of distinct files in the database
*/
SELECT COUNT(DISTINCT FILEDATA.NAME) FROM FILEDATA WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container')
Query took: 0.0002658367156982422 seconds
---
/* demystify: count the number of unique directories in the database
*/
SELECT COUNT(DISTINCT FILEDATA.DIR_NAME) FROM FILEDATA
Query took: 0.00015354156494140625 seconds
---
/* demystify: select all identification results across the database and order by namespace
*/
SELECT IDRESULTS.FILE_ID, IDDATA.ID_ID, IDDATA.METHOD, IDDATA.NS_ID
FROM IDRESULTS
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
ORDER BY
CASE IDDATA.NS_ID
WHEN '1' THEN 1
ELSE 2
END
Query took: 0.0004756450653076172 seconds
---
/* demystify: count of all extension mismatches
*/
SELECT COUNT(distinct(IDRESULTS.FILE_ID))
FROM IDRESULTS
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDDATA.EXTENSION_MISMATCH='True'
Query took: 0.0002655982971191406 seconds
---
/* demystify: count of pronom signature or container identification results
*/
SELECT COUNT(DISTINCT IDDATA.ID)
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE (NSDATA.NS_NAME='pronom')
AND (IDDATA.METHOD='Signature' OR IDDATA.METHOD='Container')
Query took: 0.0003762245178222656 seconds
---
/* demystify: count of non-pronom signature or container identification results
*/
SELECT COUNT(DISTINCT IDDATA.ID)
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE (NSDATA.NS_NAME!='pronom')
AND (IDDATA.METHOD='Signature' OR IDDATA.METHOD='Container')
Query took: 0.00039458274841308594 seconds
---
/* demystify: count all identification results for all identifiers as one
*/
SELECT COUNT(DISTINCT IDMETHOD)
FROM (SELECT IDRESULTS.FILE_ID, IDDATA.ID as IDMETHOD
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
AND (IDDATA.METHOD='XML'))
Query took: 0.0002090930938720703 seconds
---
/* demystify: count all identification results for all identifiers as one
*/
SELECT COUNT(DISTINCT IDMETHOD)
FROM (SELECT IDRESULTS.FILE_ID, IDDATA.ID as IDMETHOD
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
AND (IDDATA.METHOD='Text'))
Query took: 0.00020956993103027344 seconds
---
/* demystify: count all identification results for all identifiers as one
*/
SELECT COUNT(DISTINCT IDMETHOD)
FROM (SELECT IDRESULTS.FILE_ID, IDDATA.ID as IDMETHOD
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
AND (IDDATA.METHOD='Filename'))
Query took: 0.0002110004425048828 seconds
---
/* demystify: select information about namespace and identification based on the given identification method
*/
SELECT 'ns:' || NSDATA.NS_NAME || ' ' || IDDATA.ID, count(IDDATA.ID) as TOTAL
FROM IDDATA
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
WHERE IDDATA.BASIS LIKE '%xml match%' OR IDDATA.WARNING LIKE '%xml match%'
GROUP BY NSDATA.NS_NAME, IDDATA.iD
ORDER BY TOTAL DESC
Query took: 0.00034999847412109375 seconds
---
/* demystify: select information about namespace and identification based on the given identification method
*/
SELECT 'ns:' || NSDATA.NS_NAME || ' ' || IDDATA.ID, count(IDDATA.ID) as TOTAL
FROM IDDATA
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
WHERE IDDATA.BASIS LIKE '%text match%' OR IDDATA.WARNING LIKE '%text match%'
GROUP BY NSDATA.NS_NAME, IDDATA.iD
ORDER BY TOTAL DESC
Query took: 0.00042557716369628906 seconds
---
/* demystify: select information about namespace and identification based on the given identification method
*/
SELECT 'ns:' || NSDATA.NS_NAME || ' ' || IDDATA.ID, count(IDDATA.ID) as TOTAL
FROM IDDATA
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
WHERE IDDATA.BASIS LIKE '%match on filename%' OR IDDATA.WARNING LIKE '%match on filename%'
GROUP BY NSDATA.NS_NAME, IDDATA.iD
ORDER BY TOTAL DESC
Query took: 0.00043654441833496094 seconds
---
/* demystify: count of extensions for all signature and container identified files in the database
*/
SELECT COUNT(DISTINCT FILEDATA.EXT)
FROM FILEDATA
WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container')
Query took: 0.00018906593322753906 seconds
---
/* demystify: select information about all mimetypes recorded in the database where a mimetype is listed
*/
SELECT IDDATA.MIME_TYPE, COUNT(*) AS total
FROM IDRESULTS
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDRESULTS.ID_ID IN(4, 7, 10, 13, 16, 19, 22, 25, 31, 34, 37, 40, 43, 46, 49, 61, 64, 67, 70, 73, 76, 82, 85, 88, 91, 94, 97, 109, 112, 115, 118, 121, 124, 127, 130, 133, 136, 139, 142, 145, 148, 151, 157, 160, 163, 250, 253, 256, 259, 262, 265, 268, 271, 274, 277, 280, 283, 286, 289, 292, 295, 298, 301, 304, 307, 310, 313, 316, 319, 322, 325, 328, 331, 334, 337, 340, 343, 346, 349, 352, 355, 358, 361, 364, 367, 370, 373, 376, 379, 382, 385, 388, 391, 394, 397, 400, 403, 406, 409, 412, 418, 421, 424, 427, 430, 433, 436, 439, 442, 445, 448, 451, 454, 457, 460, 463, 466, 469, 472, 479, 485, 488, 491, 497, 500, 503, 506, 509, 512, 515, 518, 521, 524, 527, 530, 533, 536, 539, 542, 545, 548, 551, 554, 557, 560, 563, 566, 569, 572, 575, 578, 581, 584, 587, 590, 593, 596, 599, 602, 605, 608, 611, 614, 617, 620, 623, 641, 644, 647, 650, 653, 656, 659, 662, 665, 668, 671, 674, 677, 680, 683, 686, 689, 692, 695, 698, 701, 704, 707, 728, 731, 737, 740, 743, 746, 749, 752, 755, 758, 761, 764, 767, 770, 773, 776, 782, 788, 791, 794, 797, 800, 803, 806, 809, 812, 815, 818, 821, 824, 827, 830, 833, 836, 839, 842, 845, 848, 851, 854, 857, 860, 863, 866, 869, 872, 875, 878, 881, 884, 887, 890, 893, 896, 899, 902, 905, 908, 911, 914, 917, 920, 923, 926, 929, 932, 935, 938, 941, 944, 947, 950, 953, 956, 959, 962, 965, 968, 971, 417, 477, 780, 58, 106, 154, 494, 734, 53, 101, 786)AND (IDDATA.MIME_TYPE!='None' and IDDATA.MIME_TYPE!='none' and IDDATA.MIME_TYPE!='')
GROUP BY IDDATA.MIME_TYPE ORDER BY TOTAL DESC
Query took: 0.0006351470947265625 seconds
---
/* demystify: create a frequency listing of all 'last-modified' years in the database
*/
SELECT FILEDATA.YEAR, COUNT(FILEDATA.YEAR) AS total
FROM FILEDATA
WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container')
GROUP BY FILEDATA.YEAR ORDER BY TOTAL DESC
Query took: 0.00017642974853515625 seconds
---
/* demystify: select information about all signature or container identified files in the database
*/
SELECT NSDATA.NS_NAME, IDDATA.ID, COUNT(IDDATA.ID) as TOTAL
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE (IDDATA.METHOD='Signature' OR IDDATA.METHOD='Container')
GROUP BY IDDATA.ID ORDER BY NSDATA.NS_NAME, TOTAL DESC
Query took: 0.0007278919219970703 seconds
---
/* demystify: select information about files identified by extension only
*/
SELECT DISTINCT IDDATA.ID, IDDATA.FORMAT_NAME
FROM IDDATA
WHERE (IDDATA.METHOD='Extension')
Query took: 0.00010704994201660156 seconds
---
/* demystify: count of files with multiple identifications
*/
SELECT count(FREQUENCY)
FROM (SELECT FILEDATA.FILE_PATH AS PATH, COUNT(FILEDATA.FILE_ID) AS FREQUENCY
FROM IDRESULTS
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container')
GROUP BY FILEDATA.FILE_ID
ORDER BY COUNT(FILEDATA.FILE_ID) DESC)
WHERE FREQUENCY > 3
Query took: 0.0005881786346435547 seconds
---
/* demystify: select metadata about identifications for a given identification method
*/
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDDATA.METHOD='Extension'
OR IDRESULTS.FILE_ID IN (238, 241, 1)
Query took: 9.179115295410156e-05 seconds
---
/* demystify: select all unique file format extensions in the database
*/
SELECT DISTINCT FILEDATA.EXT
FROM FILEDATA
WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container')
AND FILEDATA.EXT!=''
Query took: 6.771087646484375e-05 seconds
---
/* demystify: create a frequency listing of file format extensions in the database
*/
SELECT FILEDATA.EXT, COUNT(*) AS total
FROM FILEDATA
WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container')
AND FILEDATA.EXT!=''
GROUP BY FILEDATA.EXT ORDER BY TOTAL DESC
Query took: 0.00024628639221191406 seconds
---
/* demystify: list extensions of container objects in the database
*/
SELECT DISTINCT FILEDATA.EXT FROM FILEDATA WHERE FILEDATA.TYPE='Container'
Query took: 4.8160552978515625e-05 seconds
---
/* demystify: select metadata about all results ordered by namespace
*/
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID, IDDATA.FORMAT_NAME, IDDATA.BASIS, IDDATA.FORMAT_VERSION, IDDATA.NS_ID, COUNT(IDDATA.ID) AS TOTAL
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDRESULTS.ID_ID IN (4, 7, 10, 13, 16, 19, 22, 25, 31, 34, 37, 40, 43, 46, 49, 61, 64, 67, 70, 73, 76, 82, 85, 88, 91, 94, 97, 109, 112, 115, 118, 121, 124, 127, 130, 133, 136, 139, 142, 145, 148, 151, 157, 160, 163, 250, 253, 256, 259, 262, 265, 268, 271, 274, 277, 280, 283, 286, 289, 292, 295, 298, 301, 304, 307, 310, 313, 316, 319, 322, 325, 328, 331, 334, 337, 340, 343, 346, 349, 352, 355, 358, 361, 364, 367, 370, 373, 376, 379, 382, 385, 388, 391, 394, 397, 400, 403, 406, 409, 412, 418, 421, 424, 427, 430, 433, 436, 439, 442, 445, 448, 451, 454, 457, 460, 463, 466, 469, 472, 479, 485, 488, 491, 497, 500, 503, 506, 509, 512, 515, 518, 521, 524, 527, 530, 533, 536, 539, 542, 545, 548, 551, 554, 557, 560, 563, 566, 569, 572, 575, 578, 581, 584, 587, 590, 593, 596, 599, 602, 605, 608, 611, 614, 617, 620, 623, 641, 644, 647, 650, 653, 656, 659, 662, 665, 668, 671, 674, 677, 680, 683, 686, 689, 692, 695, 698, 701, 704, 707, 728, 731, 737, 740, 743, 746, 749, 752, 755, 758, 761, 764, 767, 770, 773, 776, 782, 788, 791, 794, 797, 800, 803, 806, 809, 812, 815, 818, 821, 824, 827, 830, 833, 836, 839, 842, 845, 848, 851, 854, 857, 860, 863, 866, 869, 872, 875, 878, 881, 884, 887, 890, 893, 896, 899, 902, 905, 908, 911, 914, 917, 920, 923, 926, 929, 932, 935, 938, 941, 944, 947, 950, 953, 956, 959, 962, 965, 968, 971, 417, 477, 780)
GROUP BY IDDATA.ID
ORDER BY
CASE IDDATA.NS_ID
WHEN '1' THEN 1
ELSE 2
END
Query took: 0.0007658004760742188 seconds
---
/* demystify: select metadata about all results ordered by namespace
*/
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID, IDDATA.FORMAT_NAME, IDDATA.BASIS, IDDATA.FORMAT_VERSION, IDDATA.NS_ID, COUNT(IDDATA.ID) AS TOTAL
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDRESULTS.ID_ID IN (4, 7, 10, 13, 16, 19, 22, 25, 31, 34, 37, 40, 43, 46, 49, 61, 64, 67, 70, 73, 76, 82, 85, 88, 91, 94, 97, 109, 112, 115, 118, 121, 124, 127, 130, 133, 136, 139, 142, 145, 148, 151, 157, 160, 163, 250, 253, 256, 259, 262, 265, 268, 271, 274, 277, 280, 283, 286, 289, 292, 295, 298, 301, 304, 307, 310, 313, 316, 319, 322, 325, 328, 331, 334, 337, 340, 343, 346, 349, 352, 355, 358, 361, 364, 367, 370, 373, 376, 379, 382, 385, 388, 391, 394, 397, 400, 403, 406, 409, 412, 418, 421, 424, 427, 430, 433, 436, 439, 442, 445, 448, 451, 454, 457, 460, 463, 466, 469, 472, 479, 485, 488, 491, 497, 500, 503, 506, 509, 512, 515, 518, 521, 524, 527, 530, 533, 536, 539, 542, 545, 548, 551, 554, 557, 560, 563, 566, 569, 572, 575, 578, 581, 584, 587, 590, 593, 596, 599, 602, 605, 608, 611, 614, 617, 620, 623, 641, 644, 647, 650, 653, 656, 659, 662, 665, 668, 671, 674, 677, 680, 683, 686, 689, 692, 695, 698, 701, 704, 707, 728, 731, 737, 740, 743, 746, 749, 752, 755, 758, 761, 764, 767, 770, 773, 776, 782, 788, 791, 794, 797, 800, 803, 806, 809, 812, 815, 818, 821, 824, 827, 830, 833, 836, 839, 842, 845, 848, 851, 854, 857, 860, 863, 866, 869, 872, 875, 878, 881, 884, 887, 890, 893, 896, 899, 902, 905, 908, 911, 914, 917, 920, 923, 926, 929, 932, 935, 938, 941, 944, 947, 950, 953, 956, 959, 962, 965, 968, 971, 417, 477, 780)
GROUP BY IDDATA.ID
ORDER BY
CASE IDDATA.NS_ID
WHEN '1' THEN 1
ELSE 2
END
Query took: 0.0005586147308349609 seconds
---
/* demystify: select metadata about all results ordered by namespace
*/
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID, IDDATA.FORMAT_NAME, IDDATA.BASIS, IDDATA.FORMAT_VERSION, IDDATA.NS_ID, COUNT(IDDATA.ID) AS TOTAL
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDRESULTS.ID_ID IN (58, 106, 154, 494, 734, 53, 101, 786)
GROUP BY IDDATA.ID
ORDER BY
CASE IDDATA.NS_ID
WHEN '1' THEN 1
ELSE 2
END
Query took: 0.0012404918670654297 seconds
---
/* demystify: select metadata about all results ordered by namespace
*/
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID, IDDATA.FORMAT_NAME, IDDATA.BASIS, IDDATA.FORMAT_VERSION, IDDATA.NS_ID, COUNT(IDDATA.ID) AS TOTAL
FROM IDRESULTS
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDRESULTS.ID_ID IN (174, 240)
GROUP BY IDDATA.ID
ORDER BY
CASE IDDATA.NS_ID
WHEN '1' THEN 1
ELSE 2
END
Query took: 0.0011506080627441406 seconds
---
/* demystify: select metadata about objects with a byte match identification
*/
SELECT DISTINCT IDDATA.BASIS, IDDATA.ID, FILEDATA.NAME, FILEDATA.SIZE
FROM IDRESULTS
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDDATA.METHOD!='Container'
AND IDDATA.BASIS LIKE '%byte match%'
Query took: 0.00010251998901367188 seconds
---
/* demystify: create a frequency list of unique errors output by the identification tool
*/
SELECT FILEDATA.ERROR, COUNT(*) AS TOTAL
FROM FILEDATA
WHERE FILEDATA.TYPE!='Folder'
AND FILEDATA.ERROR!=''
AND FILEDATA.ERROR!='None'
GROUP BY FILEDATA.ERROR ORDER BY TOTAL DESC
Query took: 0.0001742839813232422 seconds
---
/* demystify: count of files from the database identified using signature or container methods for a given namespace
*/
SELECT COUNT(DISTINCT IDRESULTS.FILE_ID)
FROM IDRESULTS
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE NS_ID=1
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container')
Query took: 0.0002791881561279297 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=1
AND IDDATA.METHOD='XML'
Query took: 0.00013375282287597656 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=1
AND IDDATA.METHOD='Text'
Query took: 0.00012874603271484375 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=1
AND IDDATA.METHOD='Filename'
Query took: 0.0001723766326904297 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=1
AND IDDATA.METHOD='Extension'
Query took: 0.0001595020294189453 seconds
---
/* demystify: count of files from the database with multiple identifications for a given namespace id
*/
SELECT count(*)
FROM (SELECT COUNT(FILEDATA.FILE_ID) AS FREQUENCY
FROM IDRESULTS
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDDATA.NS_ID=1
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container')
GROUP BY FILEDATA.FILE_ID
ORDER BY COUNT(FILEDATA.FILE_ID) DESC)
WHERE FREQUENCY >1
Query took: 0.00037288665771484375 seconds
---
/* demystify: count of files from the database identified using signature or container methods for a given namespace
*/
SELECT COUNT(DISTINCT IDRESULTS.FILE_ID)
FROM IDRESULTS
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE NS_ID=2
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container')
Query took: 0.00026226043701171875 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=2
AND IDDATA.METHOD='XML'
Query took: 0.00013256072998046875 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=2
AND IDDATA.METHOD='Text'
Query took: 0.00012636184692382812 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=2
AND IDDATA.METHOD='Filename'
Query took: 0.00012636184692382812 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=2
AND IDDATA.METHOD='Extension'
Query took: 0.0001251697540283203 seconds
---
/* demystify: count of files from the database with multiple identifications for a given namespace id
*/
SELECT count(*)
FROM (SELECT COUNT(FILEDATA.FILE_ID) AS FREQUENCY
FROM IDRESULTS
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDDATA.NS_ID=2
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container')
GROUP BY FILEDATA.FILE_ID
ORDER BY COUNT(FILEDATA.FILE_ID) DESC)
WHERE FREQUENCY >2
Query took: 0.0003559589385986328 seconds
---
/* demystify: count of files from the database identified using signature or container methods for a given namespace
*/
SELECT COUNT(DISTINCT IDRESULTS.FILE_ID)
FROM IDRESULTS
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE NS_ID=3
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container')
Query took: 0.00025534629821777344 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=3
AND IDDATA.METHOD='XML'
Query took: 0.0001308917999267578 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=3
AND IDDATA.METHOD='Text'
Query took: 0.0001266002655029297 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=3
AND IDDATA.METHOD='Filename'
Query took: 0.00012683868408203125 seconds
---
/* demystify: count all rows in the iddata table
*/
SELECT COUNT(*) FROM IDDATA
WHERE NS_ID=3
AND IDDATA.METHOD='Extension'
Query took: 0.000125885009765625 seconds
---
/* demystify: count of files from the database with multiple identifications for a given namespace id
*/
SELECT count(*)
FROM (SELECT COUNT(FILEDATA.FILE_ID) AS FREQUENCY
FROM IDRESULTS
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDDATA.NS_ID=3
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container')
GROUP BY FILEDATA.FILE_ID
ORDER BY COUNT(FILEDATA.FILE_ID) DESC)
WHERE FREQUENCY >3
Query took: 0.0003495216369628906 seconds
---
/* demystify: identify gaps across all namespaces used in an identification run
*/
SELECT IDRESULTS.FILE_ID
FROM IDRESULTS
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE (IDDATA.METHOD!='Signature' AND IDDATA.METHOD!='Container')
AND IDDATA.NS_ID=1
Query took: 5.7697296142578125e-05 seconds
---
/* demystify: identify gaps across all namespaces used in an identification run
*/
SELECT IDRESULTS.FILE_ID
FROM IDRESULTS
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE (IDDATA.METHOD!='Signature' AND IDDATA.METHOD!='Container')
AND IDDATA.NS_ID=2
Query took: 5.245208740234375e-05 seconds
---
/* demystify: identify gaps across all namespaces used in an identification run
*/
SELECT IDRESULTS.FILE_ID
FROM IDRESULTS
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE (IDDATA.METHOD!='Signature' AND IDDATA.METHOD!='Container')
AND IDDATA.NS_ID=3
Query took: 4.9591064453125e-05 seconds
---
/* demystify: select all filenames from the filedata table
*/
SELECT FILEDATA.NAME FROM FILEDATA
Query took: 4.5299530029296875e-05 seconds
---
/* demystify: select distinct file names from the database
*/
SELECT DISTINCT FILEDATA.DIR_NAME FROM FILEDATA
Query took: 4.76837158203125e-05 seconds
---
/* denylist: select file and id metadata from the database for a set of files in a given list of format ids
*/
SELECT DISTINCT FILEDATA.FILE_PATH, FILEDATA.NAME, IDDATA.ID || ': ' || IDDATA.FORMAT_NAME || ' ' || IDDATA.FORMAT_VERSION
FROM IDRESULTS
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE IDDATA.METHOD in ('Text','XML','Container','Signature')
AND IDDATA.ID in ("fmt/111","fmt/682","fmt/394","x-fmt/409","x-fmt/410","x-fmt/411","fmt/688","fmt/689","fmt/690","fmt/691","fmt/468","fmt/473","fmt/474","fmt/503","fmt/523","fmt/819","x-fmt/157","x-fmt/418","x-fmt/419","x-fmt/428","x-fmt/429","x-fmt/453","application/x-sh","application/vnd.ms-tnef","application/x-stuffit","application/x-pak","application/x-mswinurl","application/x-executable","x-fmt/263","x-fmt/266","fmt/583","fmt/524")
Query took: 0.0002472400665283203 seconds
---
/* denylist: select file paths and file names from the database for a set of files with extensions in a given list
*/
SELECT DISTINCT FILEDATA.FILE_PATH, FILEDATA.NAME, FILEDATA.EXT
FROM FILEDATA
WHERE FILEDATA.TYPE != 'Folder'
AND FILEDATA.EXT in ("ini","exe","cfg","dll","lnk","tmp")
Query took: 0.00016832351684570312 seconds
---
/* denylist: select file paths and names from the database for file names in a given list
*/
SELECT DISTINCT FILEDATA.FILE_PATH, FILEDATA.NAME
FROM FILEDATA
WHERE FILEDATA.TYPE != 'Folder'
AND (FILEDATA.NAME LIKE "%.DS_Store%" or FILEDATA.NAME LIKE "%Untitled Document%" or FILEDATA.NAME LIKE "%desktop.ini%" or FILEDATA.NAME LIKE "%(copy%" or FILEDATA.NAME LIKE "%ZbThumbnail.info%" or FILEDATA.NAME LIKE "%lorem%" or FILEDATA.NAME LIKE "%New Microsoft Word Document%" or FILEDATA.NAME LIKE "%Bin.dat%" or FILEDATA.NAME LIKE "%Thumbs.db%" or FILEDATA.NAME LIKE "% vitae%" or FILEDATA.NAME LIKE "% Appointments%" or FILEDATA.NAME LIKE "% CV%" or FILEDATA.NAME LIKE "% Application%" or FILEDATA.NAME LIKE "% Resume%" or FILEDATA.NAME LIKE "% Appointment%" or FILEDATA.NAME LIKE "% Test%" or FILEDATA.NAME LIKE "% list%" or FILEDATA.NAME LIKE "% member%" or FILEDATA.NAME LIKE "% people%" or FILEDATA.NAME LIKE "% address%" or FILEDATA.NAME LIKE "% phone%")
Query took: 0.00013828277587890625 seconds
---
/* denylist: select file paths and names from the database for folder names in a given list
*/
SELECT DISTINCT FILEDATA.FILE_PATH, FILEDATA.NAME
FROM FILEDATA
WHERE FILEDATA.TYPE = 'Folder'
AND (FILEDATA.NAME LIKE "%Untitled Folder%" or FILEDATA.NAME LIKE "%New Folder%" or FILEDATA.NAME LIKE "%(copy%" or FILEDATA.NAME LIKE "%.git%" or FILEDATA.NAME LIKE "%lorem%")
Query took: 0.0001385211944580078 seconds
---
/* demystify: count the number of zero-byte files in the database
*/
SELECT COUNT(FILEDATA.SIZE) FROM FILEDATA WHERE (FILEDATA.TYPE!='Folder') AND (FILEDATA.SIZE='0')
Query took: 0.00010204315185546875 seconds
---
/* demystify: list the paths of the zero-byte files in the database
*/
SELECT FILEDATA.FILE_PATH FROM FILEDATA WHERE FILEDATA.TYPE!='Folder' AND FILEDATA.SIZE='0'
Query took: 4.76837158203125e-05 seconds
---
/* demystify: create a frequency listing of duplicate files in the database
*/
SELECT FILEDATA.HASH, COUNT(*) AS TOTAL
FROM FILEDATA
WHERE FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container'
AND FILEDATA.HASH != ''
GROUP BY FILEDATA.HASH
HAVING TOTAL > 1
ORDER BY TOTAL DESC
Query took: 0.00033855438232421875 seconds
---
/* demystify: select paths from the database for files with a given checksum
*/
SELECT FILE_PATH FROM FILEDATA WHERE FILEDATA.HASH='da39a3ee5e6b4b0d3255bfef95601890afd80709' ORDER BY FILEDATA.FILE_PATH;
Query took: 7.82012939453125e-05 seconds
---
/* demystify: select paths from the database for files with a given checksum
*/
SELECT FILE_PATH FROM FILEDATA WHERE FILEDATA.HASH='3611abfafc52dfd56482101f7d26858a451a4f58' ORDER BY FILEDATA.FILE_PATH;
Query took: 4.5299530029296875e-05 seconds
---
/* demystify: select paths from the database for files with a given checksum
*/
SELECT FILE_PATH FROM FILEDATA WHERE FILEDATA.HASH='e12dbf403f935a77ff0b045d97767788c62ff24d' ORDER BY FILEDATA.FILE_PATH;
Query took: 4.00543212890625e-05 seconds
---
/* rogues: select all non-folder file paths from the filedata table
*/
SELECT DISTINCT FILE_PATH FROM FILEDATA WHERE FILEDATA.TYPE != 'Folder'
Query took: 4.076957702636719e-05 seconds
---
/* rogues: select all 'folder' file paths from the filedata table
*/
SELECT DISTINCT FILE_PATH FROM FILEDATA WHERE FILEDATA.TYPE = 'Folder'
Query took: 7.510185241699219e-05 seconds
---
/* rogues: select files from the database that have extension mismatches
*/
SELECT DISTINCT FILEDATA.FILE_PATH
FROM IDRESULTS
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
AND (IDDATA.EXTENSION_MISMATCH='True')
Query took: 7.43865966796875e-05 seconds
---
/* rogues: select files identified using signatures or containers using PRONOM
*/
SELECT DISTINCT FILEDATA.FILE_PATH
FROM IDRESULTS
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID
WHERE (IDDATA.METHOD != 'Container' AND IDDATA.METHOD != 'Signature')
AND FILEDATA.TYPE != 'Folder'
AND IDDATA.NS_ID=1
Query took: 8.606910705566406e-05 seconds
---
/* rogues: select file paths from the database where the name is in a given list
*/
SELECT DISTINCT FILEDATA.FILE_PATH FROM FILEDATA WHERE NAME IN ("[","[]","]","£","¬")
Query took: 6.270408630371094e-05 seconds
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment