-
-
Save ross-spencer/1f6cf16c9f966b980db277f7a932d81b to your computer and use it in GitHub Desktop.
demystify analysis queries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
/* demystify: select the format identification tool used | |
*/ | |
SELECT DBMD.TOOL_TYPE FROM DBMD | |
Query took: 0.0002048015594482422 seconds | |
--- | |
/* demystify: select all namespace data | |
*/ | |
SELECT * FROM NSDATA | |
Query took: 4.2438507080078125e-05 seconds | |
--- | |
/* demystify: count the number of namespaces used by an identification report | |
*/ | |
SELECT COUNT(NSDATA.NS_ID) FROM NSDATA | |
Query took: 4.029273986816406e-05 seconds | |
--- | |
/* demystify: select the checksum type used by the database | |
*/ | |
SELECT DBMD.HASH_TYPE FROM DBMD | |
Query took: 5.91278076171875e-05 seconds | |
--- | |
/* demystify: sum the total bytes used by all files in the database | |
*/ | |
SELECT SUM(FILEDATA.SIZE) FROM FILEDATA | |
Query took: 0.00011754035949707031 seconds | |
--- | |
/* demystify: count all files in the database | |
*/ | |
SELECT COUNT(FILEDATA.FILE_ID) FROM FILEDATA WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container') | |
Query took: 0.00010251998901367188 seconds | |
--- | |
/* demystify: count the number of container files in the database | |
*/ | |
SELECT COUNT(FILEDATA.FILE_ID) FROM FILEDATA WHERE FILEDATA.TYPE='Container' | |
Query took: 7.963180541992188e-05 seconds | |
--- | |
/* demystify: count the number of files in container objects, e.g. zip | |
*/ | |
SELECT COUNT(FILEDATA.FILE_ID) FROM FILEDATA WHERE (FILEDATA.URI_SCHEME!='file') AND (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container') | |
Query took: 7.891654968261719e-05 seconds | |
--- | |
/* demystify: count the number of unique identifications | |
*/ | |
SELECT COUNT(FILEDATA.FILE_ID) FROM FILEDATA WHERE FILEDATA.TYPE='Folder' | |
Query took: 7.772445678710938e-05 seconds | |
--- | |
/* demystify: count the number of distinct files in the database | |
*/ | |
SELECT COUNT(DISTINCT FILEDATA.NAME) FROM FILEDATA WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container') | |
Query took: 0.0002658367156982422 seconds | |
--- | |
/* demystify: count the number of unique directories in the database | |
*/ | |
SELECT COUNT(DISTINCT FILEDATA.DIR_NAME) FROM FILEDATA | |
Query took: 0.00015354156494140625 seconds | |
--- | |
/* demystify: select all identification results across the database and order by namespace | |
*/ | |
SELECT IDRESULTS.FILE_ID, IDDATA.ID_ID, IDDATA.METHOD, IDDATA.NS_ID | |
FROM IDRESULTS | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
ORDER BY | |
CASE IDDATA.NS_ID | |
WHEN '1' THEN 1 | |
ELSE 2 | |
END | |
Query took: 0.0004756450653076172 seconds | |
--- | |
/* demystify: count of all extension mismatches | |
*/ | |
SELECT COUNT(distinct(IDRESULTS.FILE_ID)) | |
FROM IDRESULTS | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDDATA.EXTENSION_MISMATCH='True' | |
Query took: 0.0002655982971191406 seconds | |
--- | |
/* demystify: count of pronom signature or container identification results | |
*/ | |
SELECT COUNT(DISTINCT IDDATA.ID) | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE (NSDATA.NS_NAME='pronom') | |
AND (IDDATA.METHOD='Signature' OR IDDATA.METHOD='Container') | |
Query took: 0.0003762245178222656 seconds | |
--- | |
/* demystify: count of non-pronom signature or container identification results | |
*/ | |
SELECT COUNT(DISTINCT IDDATA.ID) | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE (NSDATA.NS_NAME!='pronom') | |
AND (IDDATA.METHOD='Signature' OR IDDATA.METHOD='Container') | |
Query took: 0.00039458274841308594 seconds | |
--- | |
/* demystify: count all identification results for all identifiers as one | |
*/ | |
SELECT COUNT(DISTINCT IDMETHOD) | |
FROM (SELECT IDRESULTS.FILE_ID, IDDATA.ID as IDMETHOD | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
AND (IDDATA.METHOD='XML')) | |
Query took: 0.0002090930938720703 seconds | |
--- | |
/* demystify: count all identification results for all identifiers as one | |
*/ | |
SELECT COUNT(DISTINCT IDMETHOD) | |
FROM (SELECT IDRESULTS.FILE_ID, IDDATA.ID as IDMETHOD | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
AND (IDDATA.METHOD='Text')) | |
Query took: 0.00020956993103027344 seconds | |
--- | |
/* demystify: count all identification results for all identifiers as one | |
*/ | |
SELECT COUNT(DISTINCT IDMETHOD) | |
FROM (SELECT IDRESULTS.FILE_ID, IDDATA.ID as IDMETHOD | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
AND (IDDATA.METHOD='Filename')) | |
Query took: 0.0002110004425048828 seconds | |
--- | |
/* demystify: select information about namespace and identification based on the given identification method | |
*/ | |
SELECT 'ns:' || NSDATA.NS_NAME || ' ' || IDDATA.ID, count(IDDATA.ID) as TOTAL | |
FROM IDDATA | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
WHERE IDDATA.BASIS LIKE '%xml match%' OR IDDATA.WARNING LIKE '%xml match%' | |
GROUP BY NSDATA.NS_NAME, IDDATA.iD | |
ORDER BY TOTAL DESC | |
Query took: 0.00034999847412109375 seconds | |
--- | |
/* demystify: select information about namespace and identification based on the given identification method | |
*/ | |
SELECT 'ns:' || NSDATA.NS_NAME || ' ' || IDDATA.ID, count(IDDATA.ID) as TOTAL | |
FROM IDDATA | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
WHERE IDDATA.BASIS LIKE '%text match%' OR IDDATA.WARNING LIKE '%text match%' | |
GROUP BY NSDATA.NS_NAME, IDDATA.iD | |
ORDER BY TOTAL DESC | |
Query took: 0.00042557716369628906 seconds | |
--- | |
/* demystify: select information about namespace and identification based on the given identification method | |
*/ | |
SELECT 'ns:' || NSDATA.NS_NAME || ' ' || IDDATA.ID, count(IDDATA.ID) as TOTAL | |
FROM IDDATA | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
WHERE IDDATA.BASIS LIKE '%match on filename%' OR IDDATA.WARNING LIKE '%match on filename%' | |
GROUP BY NSDATA.NS_NAME, IDDATA.iD | |
ORDER BY TOTAL DESC | |
Query took: 0.00043654441833496094 seconds | |
--- | |
/* demystify: count of extensions for all signature and container identified files in the database | |
*/ | |
SELECT COUNT(DISTINCT FILEDATA.EXT) | |
FROM FILEDATA | |
WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container') | |
Query took: 0.00018906593322753906 seconds | |
--- | |
/* demystify: select information about all mimetypes recorded in the database where a mimetype is listed | |
*/ | |
SELECT IDDATA.MIME_TYPE, COUNT(*) AS total | |
FROM IDRESULTS | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDRESULTS.ID_ID IN(4, 7, 10, 13, 16, 19, 22, 25, 31, 34, 37, 40, 43, 46, 49, 61, 64, 67, 70, 73, 76, 82, 85, 88, 91, 94, 97, 109, 112, 115, 118, 121, 124, 127, 130, 133, 136, 139, 142, 145, 148, 151, 157, 160, 163, 250, 253, 256, 259, 262, 265, 268, 271, 274, 277, 280, 283, 286, 289, 292, 295, 298, 301, 304, 307, 310, 313, 316, 319, 322, 325, 328, 331, 334, 337, 340, 343, 346, 349, 352, 355, 358, 361, 364, 367, 370, 373, 376, 379, 382, 385, 388, 391, 394, 397, 400, 403, 406, 409, 412, 418, 421, 424, 427, 430, 433, 436, 439, 442, 445, 448, 451, 454, 457, 460, 463, 466, 469, 472, 479, 485, 488, 491, 497, 500, 503, 506, 509, 512, 515, 518, 521, 524, 527, 530, 533, 536, 539, 542, 545, 548, 551, 554, 557, 560, 563, 566, 569, 572, 575, 578, 581, 584, 587, 590, 593, 596, 599, 602, 605, 608, 611, 614, 617, 620, 623, 641, 644, 647, 650, 653, 656, 659, 662, 665, 668, 671, 674, 677, 680, 683, 686, 689, 692, 695, 698, 701, 704, 707, 728, 731, 737, 740, 743, 746, 749, 752, 755, 758, 761, 764, 767, 770, 773, 776, 782, 788, 791, 794, 797, 800, 803, 806, 809, 812, 815, 818, 821, 824, 827, 830, 833, 836, 839, 842, 845, 848, 851, 854, 857, 860, 863, 866, 869, 872, 875, 878, 881, 884, 887, 890, 893, 896, 899, 902, 905, 908, 911, 914, 917, 920, 923, 926, 929, 932, 935, 938, 941, 944, 947, 950, 953, 956, 959, 962, 965, 968, 971, 417, 477, 780, 58, 106, 154, 494, 734, 53, 101, 786)AND (IDDATA.MIME_TYPE!='None' and IDDATA.MIME_TYPE!='none' and IDDATA.MIME_TYPE!='') | |
GROUP BY IDDATA.MIME_TYPE ORDER BY TOTAL DESC | |
Query took: 0.0006351470947265625 seconds | |
--- | |
/* demystify: create a frequency listing of all 'last-modified' years in the database | |
*/ | |
SELECT FILEDATA.YEAR, COUNT(FILEDATA.YEAR) AS total | |
FROM FILEDATA | |
WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container') | |
GROUP BY FILEDATA.YEAR ORDER BY TOTAL DESC | |
Query took: 0.00017642974853515625 seconds | |
--- | |
/* demystify: select information about all signature or container identified files in the database | |
*/ | |
SELECT NSDATA.NS_NAME, IDDATA.ID, COUNT(IDDATA.ID) as TOTAL | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE (IDDATA.METHOD='Signature' OR IDDATA.METHOD='Container') | |
GROUP BY IDDATA.ID ORDER BY NSDATA.NS_NAME, TOTAL DESC | |
Query took: 0.0007278919219970703 seconds | |
--- | |
/* demystify: select information about files identified by extension only | |
*/ | |
SELECT DISTINCT IDDATA.ID, IDDATA.FORMAT_NAME | |
FROM IDDATA | |
WHERE (IDDATA.METHOD='Extension') | |
Query took: 0.00010704994201660156 seconds | |
--- | |
/* demystify: count of files with multiple identifications | |
*/ | |
SELECT count(FREQUENCY) | |
FROM (SELECT FILEDATA.FILE_PATH AS PATH, COUNT(FILEDATA.FILE_ID) AS FREQUENCY | |
FROM IDRESULTS | |
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container') | |
GROUP BY FILEDATA.FILE_ID | |
ORDER BY COUNT(FILEDATA.FILE_ID) DESC) | |
WHERE FREQUENCY > 3 | |
Query took: 0.0005881786346435547 seconds | |
--- | |
/* demystify: select metadata about identifications for a given identification method | |
*/ | |
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDDATA.METHOD='Extension' | |
OR IDRESULTS.FILE_ID IN (238, 241, 1) | |
Query took: 9.179115295410156e-05 seconds | |
--- | |
/* demystify: select all unique file format extensions in the database | |
*/ | |
SELECT DISTINCT FILEDATA.EXT | |
FROM FILEDATA | |
WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container') | |
AND FILEDATA.EXT!='' | |
Query took: 6.771087646484375e-05 seconds | |
--- | |
/* demystify: create a frequency listing of file format extensions in the database | |
*/ | |
SELECT FILEDATA.EXT, COUNT(*) AS total | |
FROM FILEDATA | |
WHERE (FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container') | |
AND FILEDATA.EXT!='' | |
GROUP BY FILEDATA.EXT ORDER BY TOTAL DESC | |
Query took: 0.00024628639221191406 seconds | |
--- | |
/* demystify: list extensions of container objects in the database | |
*/ | |
SELECT DISTINCT FILEDATA.EXT FROM FILEDATA WHERE FILEDATA.TYPE='Container' | |
Query took: 4.8160552978515625e-05 seconds | |
--- | |
/* demystify: select metadata about all results ordered by namespace | |
*/ | |
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID, IDDATA.FORMAT_NAME, IDDATA.BASIS, IDDATA.FORMAT_VERSION, IDDATA.NS_ID, COUNT(IDDATA.ID) AS TOTAL | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDRESULTS.ID_ID IN (4, 7, 10, 13, 16, 19, 22, 25, 31, 34, 37, 40, 43, 46, 49, 61, 64, 67, 70, 73, 76, 82, 85, 88, 91, 94, 97, 109, 112, 115, 118, 121, 124, 127, 130, 133, 136, 139, 142, 145, 148, 151, 157, 160, 163, 250, 253, 256, 259, 262, 265, 268, 271, 274, 277, 280, 283, 286, 289, 292, 295, 298, 301, 304, 307, 310, 313, 316, 319, 322, 325, 328, 331, 334, 337, 340, 343, 346, 349, 352, 355, 358, 361, 364, 367, 370, 373, 376, 379, 382, 385, 388, 391, 394, 397, 400, 403, 406, 409, 412, 418, 421, 424, 427, 430, 433, 436, 439, 442, 445, 448, 451, 454, 457, 460, 463, 466, 469, 472, 479, 485, 488, 491, 497, 500, 503, 506, 509, 512, 515, 518, 521, 524, 527, 530, 533, 536, 539, 542, 545, 548, 551, 554, 557, 560, 563, 566, 569, 572, 575, 578, 581, 584, 587, 590, 593, 596, 599, 602, 605, 608, 611, 614, 617, 620, 623, 641, 644, 647, 650, 653, 656, 659, 662, 665, 668, 671, 674, 677, 680, 683, 686, 689, 692, 695, 698, 701, 704, 707, 728, 731, 737, 740, 743, 746, 749, 752, 755, 758, 761, 764, 767, 770, 773, 776, 782, 788, 791, 794, 797, 800, 803, 806, 809, 812, 815, 818, 821, 824, 827, 830, 833, 836, 839, 842, 845, 848, 851, 854, 857, 860, 863, 866, 869, 872, 875, 878, 881, 884, 887, 890, 893, 896, 899, 902, 905, 908, 911, 914, 917, 920, 923, 926, 929, 932, 935, 938, 941, 944, 947, 950, 953, 956, 959, 962, 965, 968, 971, 417, 477, 780) | |
GROUP BY IDDATA.ID | |
ORDER BY | |
CASE IDDATA.NS_ID | |
WHEN '1' THEN 1 | |
ELSE 2 | |
END | |
Query took: 0.0007658004760742188 seconds | |
--- | |
/* demystify: select metadata about all results ordered by namespace | |
*/ | |
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID, IDDATA.FORMAT_NAME, IDDATA.BASIS, IDDATA.FORMAT_VERSION, IDDATA.NS_ID, COUNT(IDDATA.ID) AS TOTAL | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDRESULTS.ID_ID IN (4, 7, 10, 13, 16, 19, 22, 25, 31, 34, 37, 40, 43, 46, 49, 61, 64, 67, 70, 73, 76, 82, 85, 88, 91, 94, 97, 109, 112, 115, 118, 121, 124, 127, 130, 133, 136, 139, 142, 145, 148, 151, 157, 160, 163, 250, 253, 256, 259, 262, 265, 268, 271, 274, 277, 280, 283, 286, 289, 292, 295, 298, 301, 304, 307, 310, 313, 316, 319, 322, 325, 328, 331, 334, 337, 340, 343, 346, 349, 352, 355, 358, 361, 364, 367, 370, 373, 376, 379, 382, 385, 388, 391, 394, 397, 400, 403, 406, 409, 412, 418, 421, 424, 427, 430, 433, 436, 439, 442, 445, 448, 451, 454, 457, 460, 463, 466, 469, 472, 479, 485, 488, 491, 497, 500, 503, 506, 509, 512, 515, 518, 521, 524, 527, 530, 533, 536, 539, 542, 545, 548, 551, 554, 557, 560, 563, 566, 569, 572, 575, 578, 581, 584, 587, 590, 593, 596, 599, 602, 605, 608, 611, 614, 617, 620, 623, 641, 644, 647, 650, 653, 656, 659, 662, 665, 668, 671, 674, 677, 680, 683, 686, 689, 692, 695, 698, 701, 704, 707, 728, 731, 737, 740, 743, 746, 749, 752, 755, 758, 761, 764, 767, 770, 773, 776, 782, 788, 791, 794, 797, 800, 803, 806, 809, 812, 815, 818, 821, 824, 827, 830, 833, 836, 839, 842, 845, 848, 851, 854, 857, 860, 863, 866, 869, 872, 875, 878, 881, 884, 887, 890, 893, 896, 899, 902, 905, 908, 911, 914, 917, 920, 923, 926, 929, 932, 935, 938, 941, 944, 947, 950, 953, 956, 959, 962, 965, 968, 971, 417, 477, 780) | |
GROUP BY IDDATA.ID | |
ORDER BY | |
CASE IDDATA.NS_ID | |
WHEN '1' THEN 1 | |
ELSE 2 | |
END | |
Query took: 0.0005586147308349609 seconds | |
--- | |
/* demystify: select metadata about all results ordered by namespace | |
*/ | |
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID, IDDATA.FORMAT_NAME, IDDATA.BASIS, IDDATA.FORMAT_VERSION, IDDATA.NS_ID, COUNT(IDDATA.ID) AS TOTAL | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDRESULTS.ID_ID IN (58, 106, 154, 494, 734, 53, 101, 786) | |
GROUP BY IDDATA.ID | |
ORDER BY | |
CASE IDDATA.NS_ID | |
WHEN '1' THEN 1 | |
ELSE 2 | |
END | |
Query took: 0.0012404918670654297 seconds | |
--- | |
/* demystify: select metadata about all results ordered by namespace | |
*/ | |
SELECT 'ns:' || NSDATA.NS_NAME || ' ', IDDATA.ID, IDDATA.FORMAT_NAME, IDDATA.BASIS, IDDATA.FORMAT_VERSION, IDDATA.NS_ID, COUNT(IDDATA.ID) AS TOTAL | |
FROM IDRESULTS | |
JOIN NSDATA on IDDATA.NS_ID = NSDATA.NS_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDRESULTS.ID_ID IN (174, 240) | |
GROUP BY IDDATA.ID | |
ORDER BY | |
CASE IDDATA.NS_ID | |
WHEN '1' THEN 1 | |
ELSE 2 | |
END | |
Query took: 0.0011506080627441406 seconds | |
--- | |
/* demystify: select metadata about objects with a byte match identification | |
*/ | |
SELECT DISTINCT IDDATA.BASIS, IDDATA.ID, FILEDATA.NAME, FILEDATA.SIZE | |
FROM IDRESULTS | |
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDDATA.METHOD!='Container' | |
AND IDDATA.BASIS LIKE '%byte match%' | |
Query took: 0.00010251998901367188 seconds | |
--- | |
/* demystify: create a frequency list of unique errors output by the identification tool | |
*/ | |
SELECT FILEDATA.ERROR, COUNT(*) AS TOTAL | |
FROM FILEDATA | |
WHERE FILEDATA.TYPE!='Folder' | |
AND FILEDATA.ERROR!='' | |
AND FILEDATA.ERROR!='None' | |
GROUP BY FILEDATA.ERROR ORDER BY TOTAL DESC | |
Query took: 0.0001742839813232422 seconds | |
--- | |
/* demystify: count of files from the database identified using signature or container methods for a given namespace | |
*/ | |
SELECT COUNT(DISTINCT IDRESULTS.FILE_ID) | |
FROM IDRESULTS | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE NS_ID=1 | |
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container') | |
Query took: 0.0002791881561279297 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=1 | |
AND IDDATA.METHOD='XML' | |
Query took: 0.00013375282287597656 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=1 | |
AND IDDATA.METHOD='Text' | |
Query took: 0.00012874603271484375 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=1 | |
AND IDDATA.METHOD='Filename' | |
Query took: 0.0001723766326904297 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=1 | |
AND IDDATA.METHOD='Extension' | |
Query took: 0.0001595020294189453 seconds | |
--- | |
/* demystify: count of files from the database with multiple identifications for a given namespace id | |
*/ | |
SELECT count(*) | |
FROM (SELECT COUNT(FILEDATA.FILE_ID) AS FREQUENCY | |
FROM IDRESULTS | |
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDDATA.NS_ID=1 | |
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container') | |
GROUP BY FILEDATA.FILE_ID | |
ORDER BY COUNT(FILEDATA.FILE_ID) DESC) | |
WHERE FREQUENCY >1 | |
Query took: 0.00037288665771484375 seconds | |
--- | |
/* demystify: count of files from the database identified using signature or container methods for a given namespace | |
*/ | |
SELECT COUNT(DISTINCT IDRESULTS.FILE_ID) | |
FROM IDRESULTS | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE NS_ID=2 | |
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container') | |
Query took: 0.00026226043701171875 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=2 | |
AND IDDATA.METHOD='XML' | |
Query took: 0.00013256072998046875 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=2 | |
AND IDDATA.METHOD='Text' | |
Query took: 0.00012636184692382812 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=2 | |
AND IDDATA.METHOD='Filename' | |
Query took: 0.00012636184692382812 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=2 | |
AND IDDATA.METHOD='Extension' | |
Query took: 0.0001251697540283203 seconds | |
--- | |
/* demystify: count of files from the database with multiple identifications for a given namespace id | |
*/ | |
SELECT count(*) | |
FROM (SELECT COUNT(FILEDATA.FILE_ID) AS FREQUENCY | |
FROM IDRESULTS | |
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDDATA.NS_ID=2 | |
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container') | |
GROUP BY FILEDATA.FILE_ID | |
ORDER BY COUNT(FILEDATA.FILE_ID) DESC) | |
WHERE FREQUENCY >2 | |
Query took: 0.0003559589385986328 seconds | |
--- | |
/* demystify: count of files from the database identified using signature or container methods for a given namespace | |
*/ | |
SELECT COUNT(DISTINCT IDRESULTS.FILE_ID) | |
FROM IDRESULTS | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE NS_ID=3 | |
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container') | |
Query took: 0.00025534629821777344 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=3 | |
AND IDDATA.METHOD='XML' | |
Query took: 0.0001308917999267578 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=3 | |
AND IDDATA.METHOD='Text' | |
Query took: 0.0001266002655029297 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=3 | |
AND IDDATA.METHOD='Filename' | |
Query took: 0.00012683868408203125 seconds | |
--- | |
/* demystify: count all rows in the iddata table | |
*/ | |
SELECT COUNT(*) FROM IDDATA | |
WHERE NS_ID=3 | |
AND IDDATA.METHOD='Extension' | |
Query took: 0.000125885009765625 seconds | |
--- | |
/* demystify: count of files from the database with multiple identifications for a given namespace id | |
*/ | |
SELECT count(*) | |
FROM (SELECT COUNT(FILEDATA.FILE_ID) AS FREQUENCY | |
FROM IDRESULTS | |
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDDATA.NS_ID=3 | |
AND (IDDATA.METHOD='Signature' or IDDATA.METHOD='Container') | |
GROUP BY FILEDATA.FILE_ID | |
ORDER BY COUNT(FILEDATA.FILE_ID) DESC) | |
WHERE FREQUENCY >3 | |
Query took: 0.0003495216369628906 seconds | |
--- | |
/* demystify: identify gaps across all namespaces used in an identification run | |
*/ | |
SELECT IDRESULTS.FILE_ID | |
FROM IDRESULTS | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE (IDDATA.METHOD!='Signature' AND IDDATA.METHOD!='Container') | |
AND IDDATA.NS_ID=1 | |
Query took: 5.7697296142578125e-05 seconds | |
--- | |
/* demystify: identify gaps across all namespaces used in an identification run | |
*/ | |
SELECT IDRESULTS.FILE_ID | |
FROM IDRESULTS | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE (IDDATA.METHOD!='Signature' AND IDDATA.METHOD!='Container') | |
AND IDDATA.NS_ID=2 | |
Query took: 5.245208740234375e-05 seconds | |
--- | |
/* demystify: identify gaps across all namespaces used in an identification run | |
*/ | |
SELECT IDRESULTS.FILE_ID | |
FROM IDRESULTS | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE (IDDATA.METHOD!='Signature' AND IDDATA.METHOD!='Container') | |
AND IDDATA.NS_ID=3 | |
Query took: 4.9591064453125e-05 seconds | |
--- | |
/* demystify: select all filenames from the filedata table | |
*/ | |
SELECT FILEDATA.NAME FROM FILEDATA | |
Query took: 4.5299530029296875e-05 seconds | |
--- | |
/* demystify: select distinct file names from the database | |
*/ | |
SELECT DISTINCT FILEDATA.DIR_NAME FROM FILEDATA | |
Query took: 4.76837158203125e-05 seconds | |
--- | |
/* denylist: select file and id metadata from the database for a set of files in a given list of format ids | |
*/ | |
SELECT DISTINCT FILEDATA.FILE_PATH, FILEDATA.NAME, IDDATA.ID || ': ' || IDDATA.FORMAT_NAME || ' ' || IDDATA.FORMAT_VERSION | |
FROM IDRESULTS | |
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE IDDATA.METHOD in ('Text','XML','Container','Signature') | |
AND IDDATA.ID in ("fmt/111","fmt/682","fmt/394","x-fmt/409","x-fmt/410","x-fmt/411","fmt/688","fmt/689","fmt/690","fmt/691","fmt/468","fmt/473","fmt/474","fmt/503","fmt/523","fmt/819","x-fmt/157","x-fmt/418","x-fmt/419","x-fmt/428","x-fmt/429","x-fmt/453","application/x-sh","application/vnd.ms-tnef","application/x-stuffit","application/x-pak","application/x-mswinurl","application/x-executable","x-fmt/263","x-fmt/266","fmt/583","fmt/524") | |
Query took: 0.0002472400665283203 seconds | |
--- | |
/* denylist: select file paths and file names from the database for a set of files with extensions in a given list | |
*/ | |
SELECT DISTINCT FILEDATA.FILE_PATH, FILEDATA.NAME, FILEDATA.EXT | |
FROM FILEDATA | |
WHERE FILEDATA.TYPE != 'Folder' | |
AND FILEDATA.EXT in ("ini","exe","cfg","dll","lnk","tmp") | |
Query took: 0.00016832351684570312 seconds | |
--- | |
/* denylist: select file paths and names from the database for file names in a given list | |
*/ | |
SELECT DISTINCT FILEDATA.FILE_PATH, FILEDATA.NAME | |
FROM FILEDATA | |
WHERE FILEDATA.TYPE != 'Folder' | |
AND (FILEDATA.NAME LIKE "%.DS_Store%" or FILEDATA.NAME LIKE "%Untitled Document%" or FILEDATA.NAME LIKE "%desktop.ini%" or FILEDATA.NAME LIKE "%(copy%" or FILEDATA.NAME LIKE "%ZbThumbnail.info%" or FILEDATA.NAME LIKE "%lorem%" or FILEDATA.NAME LIKE "%New Microsoft Word Document%" or FILEDATA.NAME LIKE "%Bin.dat%" or FILEDATA.NAME LIKE "%Thumbs.db%" or FILEDATA.NAME LIKE "% vitae%" or FILEDATA.NAME LIKE "% Appointments%" or FILEDATA.NAME LIKE "% CV%" or FILEDATA.NAME LIKE "% Application%" or FILEDATA.NAME LIKE "% Resume%" or FILEDATA.NAME LIKE "% Appointment%" or FILEDATA.NAME LIKE "% Test%" or FILEDATA.NAME LIKE "% list%" or FILEDATA.NAME LIKE "% member%" or FILEDATA.NAME LIKE "% people%" or FILEDATA.NAME LIKE "% address%" or FILEDATA.NAME LIKE "% phone%") | |
Query took: 0.00013828277587890625 seconds | |
--- | |
/* denylist: select file paths and names from the database for folder names in a given list | |
*/ | |
SELECT DISTINCT FILEDATA.FILE_PATH, FILEDATA.NAME | |
FROM FILEDATA | |
WHERE FILEDATA.TYPE = 'Folder' | |
AND (FILEDATA.NAME LIKE "%Untitled Folder%" or FILEDATA.NAME LIKE "%New Folder%" or FILEDATA.NAME LIKE "%(copy%" or FILEDATA.NAME LIKE "%.git%" or FILEDATA.NAME LIKE "%lorem%") | |
Query took: 0.0001385211944580078 seconds | |
--- | |
/* demystify: count the number of zero-byte files in the database | |
*/ | |
SELECT COUNT(FILEDATA.SIZE) FROM FILEDATA WHERE (FILEDATA.TYPE!='Folder') AND (FILEDATA.SIZE='0') | |
Query took: 0.00010204315185546875 seconds | |
--- | |
/* demystify: list the paths of the zero-byte files in the database | |
*/ | |
SELECT FILEDATA.FILE_PATH FROM FILEDATA WHERE FILEDATA.TYPE!='Folder' AND FILEDATA.SIZE='0' | |
Query took: 4.76837158203125e-05 seconds | |
--- | |
/* demystify: create a frequency listing of duplicate files in the database | |
*/ | |
SELECT FILEDATA.HASH, COUNT(*) AS TOTAL | |
FROM FILEDATA | |
WHERE FILEDATA.TYPE='File' OR FILEDATA.TYPE='Container' | |
AND FILEDATA.HASH != '' | |
GROUP BY FILEDATA.HASH | |
HAVING TOTAL > 1 | |
ORDER BY TOTAL DESC | |
Query took: 0.00033855438232421875 seconds | |
--- | |
/* demystify: select paths from the database for files with a given checksum | |
*/ | |
SELECT FILE_PATH FROM FILEDATA WHERE FILEDATA.HASH='da39a3ee5e6b4b0d3255bfef95601890afd80709' ORDER BY FILEDATA.FILE_PATH; | |
Query took: 7.82012939453125e-05 seconds | |
--- | |
/* demystify: select paths from the database for files with a given checksum | |
*/ | |
SELECT FILE_PATH FROM FILEDATA WHERE FILEDATA.HASH='3611abfafc52dfd56482101f7d26858a451a4f58' ORDER BY FILEDATA.FILE_PATH; | |
Query took: 4.5299530029296875e-05 seconds | |
--- | |
/* demystify: select paths from the database for files with a given checksum | |
*/ | |
SELECT FILE_PATH FROM FILEDATA WHERE FILEDATA.HASH='e12dbf403f935a77ff0b045d97767788c62ff24d' ORDER BY FILEDATA.FILE_PATH; | |
Query took: 4.00543212890625e-05 seconds | |
--- | |
/* rogues: select all non-folder file paths from the filedata table | |
*/ | |
SELECT DISTINCT FILE_PATH FROM FILEDATA WHERE FILEDATA.TYPE != 'Folder' | |
Query took: 4.076957702636719e-05 seconds | |
--- | |
/* rogues: select all 'folder' file paths from the filedata table | |
*/ | |
SELECT DISTINCT FILE_PATH FROM FILEDATA WHERE FILEDATA.TYPE = 'Folder' | |
Query took: 7.510185241699219e-05 seconds | |
--- | |
/* rogues: select files from the database that have extension mismatches | |
*/ | |
SELECT DISTINCT FILEDATA.FILE_PATH | |
FROM IDRESULTS | |
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
AND (IDDATA.EXTENSION_MISMATCH='True') | |
Query took: 7.43865966796875e-05 seconds | |
--- | |
/* rogues: select files identified using signatures or containers using PRONOM | |
*/ | |
SELECT DISTINCT FILEDATA.FILE_PATH | |
FROM IDRESULTS | |
JOIN FILEDATA on IDRESULTS.FILE_ID = FILEDATA.FILE_ID | |
JOIN IDDATA on IDRESULTS.ID_ID = IDDATA.ID_ID | |
WHERE (IDDATA.METHOD != 'Container' AND IDDATA.METHOD != 'Signature') | |
AND FILEDATA.TYPE != 'Folder' | |
AND IDDATA.NS_ID=1 | |
Query took: 8.606910705566406e-05 seconds | |
--- | |
/* rogues: select file paths from the database where the name is in a given list | |
*/ | |
SELECT DISTINCT FILEDATA.FILE_PATH FROM FILEDATA WHERE NAME IN ("[","[]","]","£","¬") | |
Query took: 6.270408630371094e-05 seconds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment