Skip to content

Instantly share code, notes, and snippets.

@mzfr

mzfr/DB-details.md

Last active Aug 24, 2020
Embed
What would you like to do?

What data do we store?

Currently the data that is stored in redis is in 3 category::

  1. The snare data which looks like:

    Code

    {
        "version": 1,
        "response": {
            "message": {
                "sess_uuid": "ba800b9528dd4a78b279940781ce9513",
                "peer_ip": "196.207.97.20",
                "peer_port": 36864,
                "location": {
                    "country": "India",
                    "country_code": "IN",
                    "city": "Delhi",
                    "zip_code": "110092"
                },
                "user_agent": "Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
                "snare_uuid": "9f7d7dd3-ac6b-468b-8cee-ce3e352eff6e",
                "start_time": 1589192688.001088,
                "end_time": 1589192688.7164452,
                "requests_in_second": 6.989514155656564,
                "approx_time_between_requests": 0.14308667182922363,
                "accepted_paths": 5,
                "errors": 0,
                "hidden_links": 0,
                "attack_types": [
                    "index",
                    "index",
                    "index",
                    "index",
                    "index"
                ],
                "attack_count": {
                    "index": 5
                },
                "paths": [
                    {
                        "path": "/sites/default/files/css/css_z-dr4virJu_BxGOzYkqzNig8eQwU5gcOAg9Q_9HnNcc.css",
                        "timestamp": 1589192688.0010145,
                        "response_status": 200,
                        "attack_type": "index"
                    },
                    {
                        "path": "/sites/default/files/css/css_Dv7p9v7082dsQRGeMesk8NEyA_OGz6vCwSwMLvHUvc8.css",
                        "timestamp": 1589192688.0819743,
                        "response_status": 200,
                        "attack_type": "index"
                    },
                    {
                        "path": "/sites/default/files/js/js_I5vHqOSLMMkFHgYx-DDHQhxnTdw0bIxEMteli5ellcY.js",
                        "timestamp": 1589192688.089108,
                        "response_status": 200,
                        "attack_type": "index"
                    },
                    {
                        "path": "/sites/default/files/css/css_Z5jMg7P_bjcW9iUzujI7oaechMyxQTUqZhHJ_aYSq04.css",
                        "timestamp": 1589192688.090624,
                        "response_status": 200,
                        "attack_type": "index"
                    },
                    {
                        "path": "/core/misc/favicon.ico",
                        "timestamp": 1589192688.7164478,
                        "response_status": 200,
                        "attack_type": "index"
                    }
                ],
                "cookies": {
                    "sess_uuid": "d96bfa6d-e7a4-4344-99ab-c39cc448208f"
                },
                "referer": "/",
                "possible_owners": {
                    "user": 1
                }
            }
        }
    }

    This was taken out using the tanner API.

  2. The session data which looks like:

    Code

    {
    "peer": {
        "ip": "84.56.155.136",
        "port": 48194
    },
    "user_agent": "Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36",
    "snare_uuid": "776edef4-ec5f-4fe0-8485-02cf9ff1f64d",
    "sess_uuid": "3d09dbb58465444684cbfb9b7d8a313f",
    "start_time": 1583352304.5080535,
    "end_time": 1583352304.508056,
    "count": 1,
    "paths": [
        {
        "path": "/",
        "timestamp": 1583352304.50796,
        "response_status": 200,
        "attack_type": "index"
        }
    ],
    "cookies": {
        "sess_uuid": null
    },
    "referer": null
    }

    And inside the redis DB it looks like:

    Code

    1) "{\"sess_uuid\": \"d96bfa6de7a4434499abc39cc448208f\", \"peer_ip\": \"196.207.97.20\", \"peer_port\": 36864, \"location\": {\"country\": \"India\", \"country_code\": \"IN\", \"city\": \"Delhi\", \"zip_code\": \"110092\"}, \"user_agent\": \"Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0\", \"snare_uuid\": \"9f7d7dd3-ac6b-468b-8cee-ce3e352eff6e\", \"start_time\": 1589192687.8153875, \"end_time\": 1589192687.8153908, \"requests_in_second\": 299593.14285714284, \"approx_time_between_requests\": 0.0, \"accepted_paths\": 1, \"errors\": 0, \"hidden_links\": 0, \"attack_types\": [\"index\"], \"attack_count\": {\"index\": 1}, \"paths\": [{\"path\": \"/\", \"timestamp\": 1589192687.8153307, \"response_status\": 200, \"attack_type\": \"index\"}], \"cookies\": {\"sess_uuid\": null}, \"referer\": null, \"possible_owners\": {\"user\": 0.15, \"tool\": 0.85, \"crawler\": 0.5}}"
    2) "1589192687.8153875"
    3) "{\"sess_uuid\": \"ba800b9528dd4a78b279940781ce9513\", \"peer_ip\": \"196.207.97.20\", \"peer_port\": 36864, \"location\": {\"country\": \"India\", \"country_code\": \"IN\", \"city\": \"Delhi\", \"zip_code\": \"110092\"}, \"user_agent\": \"Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0\", \"snare_uuid\": \"9f7d7dd3-ac6b-468b-8cee-ce3e352eff6e\", \"start_time\": 1589192688.001088, \"end_time\": 1589192688.7164452, \"requests_in_second\": 6.989514155656564, \"approx_time_between_requests\": 0.14308667182922363, \"accepted_paths\": 5, \"errors\": 0, \"hidden_links\": 0, \"attack_types\": [\"index\", \"index\", \"index\", \"index\", \"index\"], \"attack_count\": {\"index\": 5}, \"paths\": [{\"path\": \"/sites/default/files/css/css_z-dr4virJu_BxGOzYkqzNig8eQwU5gcOAg9Q_9HnNcc.css\", \"timestamp\": 1589192688.0010145, \"response_status\": 200, \"attack_type\": \"index\"}, {\"path\": \"/sites/default/files/css/css_Dv7p9v7082dsQRGeMesk8NEyA_OGz6vCwSwMLvHUvc8.css\", \"timestamp\": 1589192688.0819743, \"response_status\": 200, \"attack_type\": \"index\"}, {\"path\": \"/sites/default/files/js/js_I5vHqOSLMMkFHgYx-DDHQhxnTdw0bIxEMteli5ellcY.js\", \"timestamp\": 1589192688.089108, \"response_status\": 200, \"attack_type\": \"index\"}, {\"path\": \"/sites/default/files/css/css_Z5jMg7P_bjcW9iUzujI7oaechMyxQTUqZhHJ_aYSq04.css\", \"timestamp\": 1589192688.090624, \"response_status\": 200, \"attack_type\": \"index\"}, {\"path\": \"/core/misc/favicon.ico\", \"timestamp\": 1589192688.7164478, \"response_status\": 200, \"attack_type\": \"index\"}], \"cookies\": {\"sess_uuid\": \"d96bfa6d-e7a4-4344-99ab-c39cc448208f\"}, \"referer\": \"/\", \"possible_owners\": {\"user\": 1.0}}"
    4) "1589192688.0010879"
    5) "{\"sess_uuid\": \"83b63f7699464d3280c465fc04f75ea8\", \"peer_ip\": \"78.188.223.83\", \"peer_port\": 35981, \"location\": {\"country\": \"Turkey\", \"country_code\": \"TR\", \"city\": \"Istanbul\", \"zip_code\": null}, \"user_agent\": \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\", \"snare_uuid\": \"9f7d7dd3-ac6b-468b-8cee-ce3e352eff6e\", \"start_time\": 1589197143.6221383, \"end_time\": 1589197143.6221387, \"requests_in_second\": 2097152.0, \"approx_time_between_reqsuests\": 0.0, \"accepted_paths\": 1, \"errors\": 0, \"hidden_links\": 0, \"attack_types\": [\"index\"], \"attack_count\": {\"index\": 1}, \"paths\": [{\"path\": \"/\", \"timestamp\": 1589197143.6221015, \"response_status\": 200, \"attack_type\": \"index\"}], \"cookies\": {\"sess_uuid\": null}, \"referer\": null, \"possible_owners\": {\"user\": 0.15, \"tool\": 0.85, \"crawler\": 0.5}}"
    6) "1589197143.6221383"
    

  3. The Dorks, which are present in the form of set after loading them from dork.pickle binary. Then they are added into redis DB using sadd function(which add multiple values for a single key). Then they can be viewed inside the redis using smembers <key>.

How to store the data in postgres?

Now this is the problem we are trying to solve. Now we are totally going to drop the idea of storing them as single dictionary which was implemented in the following PR. But I have 2 ways we can do this:

  1. Totally go with some kind of relational schema and don't use JSONB(dict) in any of data we store. A rough example for this is:

    Example

    CREATE TABLE IF NOT EXISTS "data" (
    "version" INT NULL,
    "response.message.sess_uuid" TEXT NULL,
    "response.message.peer_ip" TEXT NULL,
    "response.message.peer_port" INT NULL,
    "response.message.location.country" TEXT NULL,
    "response.message.location.country_code" TEXT NULL,
    "response.message.location.city" TEXT NULL,
    "response.message.location.zip_code" INT NULL,
    "response.message.user_agent" TEXT NULL,
    "response.message.snare_uuid" TEXT NULL,
    "response.message.start_time" FLOAT NULL,
    "response.message.end_time" FLOAT NULL,
    "response.message.requests_in_second" FLOAT NULL,
    "response.message.approx_time_between_requests" FLOAT NULL,
    "response.message.accepted_paths" INT NULL,
    "response.message.errors" INT NULL,
    "response.message.hidden_links" INT NULL,
    "response.message.attack_types" JSON NULL,
    "response.message.attack_count.index" INT NULL,
    "response.message.paths" JSON NULL,
    "response.message.cookies.sess_uuid" TEXT NULL,
    "response.message.referer" TEXT NULL,
    "response.message.possible_owners.user" INT NULL
    );

    This is just a very simple and rough example of how it could end up looking. And for such kind of table we will be having following query to insert the values.

    Query

    INSERT INTO data VALUES
    (1,'ba800b9528dd4a78b279940781ce9513','196.207.97.20',36864,'India','IN','Delhi',110092,'Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0','9f7d7dd3-ac6b-468b-8cee-ce3e352eff6e',1589192688.001088,1589192688.7164452,6.989514155656564,0.14308667182922363,5,0,0,[\"index\"\,\"index\"\,\"index\"\,\"index\"\,\"index\"],5,[{\"attack_type\":\"index\"\,\"path\":\"/sites/default/files/css/css_z-dr4virJu_BxGOzYkqzNig8eQwU5gcOAg9Q_9HnNcc.css\"\,\"response_status\":200\,\"timestamp\":1589192688.0010145}\,{\"attack_type\":\"index\"\,\"path\":\"/sites/default/files/css/css_Dv7p9v7082dsQRGeMesk8NEyA_OGz6vCwSwMLvHUvc8.css\"\,\"response_status\":200\,\"timestamp\":1589192688.0819743}\,{\"attack_type\":\"index\"\,\"path\":\"/sites/default/files/js/js_I5vHqOSLMMkFHgYx-DDHQhxnTdw0bIxEMteli5ellcY.js\"\,\"response_status\":200\,\"timestamp\":1589192688.089108}\,{\"attack_type\":\"index\"\,\"path\":\"/sites/default/files/css/css_Z5jMg7P_bjcW9iUzujI7oaechMyxQTUqZhHJ_aYSq04.css\"\,\"response_status\":200\,\"timestamp\":1589192688.090624}\,{\"attack_type\":\"index\"\,\"path\":\"/core/misc/favicon.ico\"\,\"response_status\":200\,\"timestamp\":1589192688.7164478}],'d96bfa6d-e7a4-4344-99ab-c39cc448208f','/',1);

  2. Another option for us is that we use JSONB but not for the complete data. What we can do is that we can have certain fields as JSONB and others can be in simple columns format.

    Example

    We will define the schema like

    CREATE TABLE IF NOT EXISTS "data" (
        "version" INT NULL,
        "response.message.sess_uuid" TEXT NULL,
        "response.message.peer_ip" TEXT NULL,
        "response.message.peer_port" INT NULL,
        "response.message.location" JSONB ,
        ......
    )

    Now using this method it could help us to come up with complex schema. For this kind of methodology we will have to decide what all columns to define out and what all to make JSONB.

P.S: In second approach I read in quite a few articles that if we have JSON data it's better to store in JSON format but also that the problem with dumping the complete dict in one single column is that if we have to change a single KEY VALUE then we'll have to change the whole dict(I haven't tested this myself and will be writing small scripts to check whether this is true or not). Since all our data is in JSON that is why I recommended the second way but in improved manner i.e to not dump the complete data in a single column but to have various columns some having types like VAR, INT and some of them having JSONB.

P.P.S: As I was going through the code I noticed that we use functions to validate the data and I thought that since our data is in JSON we can define JSON schemas and then use those for validation. This is just a very raw idea that popped in my head :)

@mzfr

This comment has been minimized.

Copy link
Owner Author

@mzfr mzfr commented May 15, 2020

CREATE TABLE IF NOT EXISTS "snare_data" {
    "version" INT NOT NULL,
    "sess_uuid" TEXT NOT NULL PRIMARY KEY,
    "message" TEXT REFERENCES session_data(sess_uuid)

}
CREATE TABLE IF NOT EXISTS "session_data" {
    "sess_uuid" TEXT NOT NULL PRIMARY KEY,
    "snare_uuid" TEXT NOT NULL,
    "start_time" FLOAT NOT NULL,
    "end_time" FLOAT NOT NULL,
    "user_agent" TEXT NOT NULL,
    "count" INT,
}

NOTE: This is not needed anymore since we are following some other path now

@mzfr

This comment has been minimized.

Copy link
Owner Author

@mzfr mzfr commented May 18, 2020

CREATE TABLE IF NOT EXISTS "Paths" (
    "sess_uuid" TEXT NOT NULL,
    "path" TEXT NOT NULL,
    "timestamp" FLOAT NOT NULL,
    "response_status" INT NOT NULL,
    "attack_type" TEXT NOT NULL
)
CREATE TABLE IF NOT EXISTS "Cookies" (
    "sess_uuid" TEXT NOT NULL,
    "key" TEXT NULL,
    "value" TEXT NULL
)
CREATE TABLE IF NOT EXISTS "session_data" (
    "sess_uuid" TEXT NULL PRIMARY KEY,
    "snare_uuid" TEXT NOT NULL,
    "start_time" FLOAT NOT NULL,
    "peer.ip" TEXT NOT NULL,
    "peer.port" INT NOT NULL,
    "user_agent" TEXT NOT NULL,
    "end_time" FLOAT NOT NULL,
    "count" INT NOT NULL,
    "paths" TEXT REFERENCES Paths(sess_uuid),
    "cookies" TEXT REFERENCES Cookies(sess_uuid)
    "referer" TEXT NULL
    )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.