Skip to content

Instantly share code, notes, and snippets.

@Richardn2002
Last active September 13, 2023 02:05
Show Gist options
  • Save Richardn2002/1bce5d24812f31389f5584d15823e46f to your computer and use it in GitHub Desktop.
Save Richardn2002/1bce5d24812f31389f5584d15823e46f to your computer and use it in GitHub Desktop.
Zhihu chat history logger.
const fs = require('fs');
const secret = JSON.parse(fs.readFileSync('secret.json'));
const IMAGE_PATH = './out/images';
const STICKER_PATH = './out/stickers';
let historyPath;
let rawHistoryPath;
const STATUS_PATH = './status.json';
if (!fs.existsSync(IMAGE_PATH)) {
fs.mkdirSync(IMAGE_PATH, {recursive: true});
}
if (!fs.existsSync(STICKER_PATH)) {
fs.mkdirSync(STICKER_PATH);
}
const SENDER_NAME = secret.SENDER_NAME;
const RECEIVER_NAME = secret.RECEIVER_NAME;
const X_ZSE_93 = '101_3_2.0';
const DC_0 = secret.DC_0;
//const X_ZST_81 = secret.X_ZST_81;
const USER_AGENT = secret.USER_AGENT;
const COOKIE = secret.COOKIE;
const SENDER_ID = secret.SENDER_ID;
const x_zse_96_2_0 = require('./x-zse-96-2.0-encrypt');
const url = require('url');
class ChatEntry {
constructor(href) {
this.after_id = url.parse(href, true).query.after_id;
}
getRequestOptions() {
const path = '/api/v4/chat?sender_id=' + SENDER_ID + '&after_id=' + this.after_id + '&limit=20';
return {
hostname: 'www.zhihu.com',
port: 443,
path: path,
method: 'GET',
headers: {
'User-Agent': USER_AGENT,
'x-zse-93': X_ZSE_93,
'x-zse-96': x_zse_96_2_0([X_ZSE_93, path, DC_0].join('+')),
'Cookie': COOKIE
}
}
}
}
let taskCounter = 0;
class Message {
constructor(message) {
this.id = message.id;
this.timestamp = message.created_time;
this.type = message.content_type;
this.dateString = (new Date(this.timestamp * 1000 + 8 * 3600 * 1000)).toISOString();
this.speaker = message.user_type == 'receiver' ? RECEIVER_NAME : SENDER_NAME;
if (message.is_canceled) {
this.text = '[Message recalled.]';
} else {
switch (this.type) {
case 0:
this.text = message.text;
break;
case 1:
this.contentUrl = message.image.url;
this.contentNaming = this.id + this.contentUrl.match(/\.[0-9a-z]+$/i)[0]; // properly set extension name
this.text = '[Image: ' + this.contentNaming + ']';
break;
case 2:
this.contentUrl = message.sticker.url;
this.contentNaming = (message.sticker.title ? message.sticker.title : message.sticker.sticker_id) + this.contentUrl.match(/\.[0-9a-z]+$/i)[0];
// some stickers do not have the title property. f**k zhihu.
this.text = '[Sticker: ' + this.contentNaming + ']';
break;
}
}
}
downloadContent() {
let messageId = this.id;
let finish = function() {
status.progress = Math.min(status.progress, messageId);
// min is for in case the more recent file completes download later than older ones
fs.writeFile(STATUS_PATH, JSON.stringify(status), () => {
taskCounter --;
continueGuard();
});
}
if (this.type !== 0) {
let filePath = (this.type == 1 ? IMAGE_PATH : STICKER_PATH) + '/' + this.contentNaming;
let contentUrl = this.contentUrl;
if (!fs.existsSync(filePath)) {
https.request(contentUrl, function(response) {
let data = new Stream();
response.on('data', function(chunk) {
data.push(chunk);
});
response.on('end', function() {
fs.writeFile(filePath, data.read(), finish);
});
response.on('error', (e) => {
console.log('\nError downloading ' + filePath + ': ' + e);
console.log('Url: ' + contentUrl + '\n');
finish();
})
}).end();
} else {
finish();
}
} else {
finish();
}
}
toString() {
return this.dateString.slice(0, 10) + ' ' + this.dateString.slice(-13, -5) + ' ' + this.speaker + ':\n' + this.text + '\n';
}
toObject() {
return {id: this.id, timestamp: this.timestamp, type: this.type, text: this.text, contentNaming: this.contentNaming, contentUrl: this.contentUrl};
}
}
// Retrieve chat history logging status
let status;
if (fs.existsSync(STATUS_PATH)) {
status = JSON.parse(fs.readFileSync(STATUS_PATH));
} else {
status = {latest: 0, progress: Infinity, isEnd: false};
}
let path;
if (status.latest == 0 || status.isEnd) {
// a new entry point is needed
path = '/api/v4/chat?sender_id=' + SENDER_ID;
} else {
// continue logging since a specific message id
// this id is also used as the name for the history file starting from it
historyPath = './out/' + status.latest + '.txt';
rawHistoryPath = './out/' + status.latest + '.rawlist';
path = '/api/v4/chat?sender_id=' + SENDER_ID + '&after_id=' + status.progress + '&limit=20';
}
const https = require('https');
const Stream = require('stream').Transform;
// Initiate history pull
let previousLatest;
let currentChunk;
https.request({
hostname: 'www.zhihu.com',
port: 443,
path: path,
method: 'GET',
headers: {
'User-Agent': USER_AGENT,
'x-zse-93': X_ZSE_93,
'x-zse-96': x_zse_96_2_0([X_ZSE_93, path, DC_0].join('+')),
'Cookie': COOKIE,
}
}, (res) => {
let data = '';
res.on('data', (d) => {
data += d;
});
res.on('end', () => {
currentChunk = JSON.parse(data.toString());
if (status.latest == 0 || status.isEnd) {
if (currentChunk.data.messages[0].id === status.latest) {
// no new messages to log
return;
}
previousLatest = status.latest;
status.latest = currentChunk.data.messages[0].id;
historyPath = './out/' + status.latest + '.txt';
rawHistoryPath = './out/' + status.latest + '.rawlist';
processMessageArray(currentChunk.data.messages);
} else {
processMessageArray(currentChunk.data.messages.slice(1));
// exclude the duplicate head (specifically, the message with id after_id)
}
});
}).end();
function processPage(chatEntry) {
https.request(chatEntry.getRequestOptions(), (res) => {
let data = '';
res.on('data', (d) => {
data += d;
})
res.on('end', () => {
currentChunk = JSON.parse(data.toString());
processMessageArray(currentChunk.data.messages);
})
}).end();
}
function processMessageArray(array) {
taskCounter += array.length;
array.forEach((message) => {
let messageObject = new Message(message);
messageObject.downloadContent();
fs.appendFileSync(historyPath, messageObject.toString() + '\n');
fs.appendFileSync(rawHistoryPath, JSON.stringify(messageObject) + ',');
process.stdout.write(messageObject.dateString + '\r');
});
}
function continueGuard() {
if (taskCounter === 0) {
if (status.progress > previousLatest) {
if (currentChunk.paging.is_end) {
status.isEnd = true;
fs.writeFile(STATUS_PATH, JSON.stringify(status), () => {console.log('History end reached.')});
} else {
processPage(new ChatEntry(currentChunk.paging.next));
}
} else {
status.isEnd = true;
fs.writeFile(STATUS_PATH, JSON.stringify(status), () => {console.log('Previous entry point reached.')});
}
}
}
@Richardn2002
Copy link
Author

x-zse-96-2.0-encrypt.js:

'use strict';
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const window = new JSDOM(`<!DOCTYPE html><p></p>`).window;

const atob = (data) => Buffer.from(data, 'base64').toString('binary');
const crypto = require('crypto');
const md5 = (string) => crypto.createHash('md5').update(string).digest("hex");

function t(e) {
    return (t = 'function' == typeof Symbol && 'symbol' == typeof Symbol.A ? function(e) {
            return typeof e
        } :
        function(e) {
            return e && 'function' == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? 'symbol' : typeof e
        })(e)
}
var __g = {};

function s() {}

function i(e) {
    this.t = (2048 & e) >> 11,
        this.s = (1536 & e) >> 9,
        this.i = 511 & e,
        this.h = 511 & e
}

function h(e) {
    this.s = (3072 & e) >> 10,
        this.h = 1023 & e
}

function a(e) {
    this.a = (3072 & e) >> 10,
        this.c = (768 & e) >> 8,
        this.n = (192 & e) >> 6,
        this.t = 63 & e
}

function c(e) {
    this.s = e >> 10 & 3,
        this.i = 1023 & e
}

function n() {}

function e(e) {
    this.a = (3072 & e) >> 10,
        this.c = (768 & e) >> 8,
        this.n = (192 & e) >> 6,
        this.t = 63 & e
}

function o(e) {
    this.h = (4095 & e) >> 2,
        this.t = 3 & e
}

function r(e) {
    this.s = e >> 10 & 3,
        this.i = e >> 2 & 255,
        this.t = 3 & e
}
s.prototype.e = function(e) {
        e.o = !1
    },
    i.prototype.e = function(e) {
        switch (this.t) {
            case 0:
                e.r[this.s] = this.i;
                break;
            case 1:
                e.r[this.s] = e.k[this.h]
        }
    },
    h.prototype.e = function(e) {
        e.k[this.h] = e.r[this.s]
    },
    a.prototype.e = function(e) {
        switch (this.t) {
            case 0:
                e.r[this.a] = e.r[this.c] + e.r[this.n];
                break;
            case 1:
                e.r[this.a] = e.r[this.c] - e.r[this.n];
                break;
            case 2:
                e.r[this.a] = e.r[this.c] * e.r[this.n];
                break;
            case 3:
                e.r[this.a] = e.r[this.c] / e.r[this.n];
                break;
            case 4:
                e.r[this.a] = e.r[this.c] % e.r[this.n];
                break;
            case 5:
                e.r[this.a] = e.r[this.c] == e.r[this.n];
                break;
            case 6:
                e.r[this.a] = e.r[this.c] >= e.r[this.n];
                break;
            case 7:
                e.r[this.a] = e.r[this.c] || e.r[this.n];
                break;
            case 8:
                e.r[this.a] = e.r[this.c] && e.r[this.n];
                break;
            case 9:
                e.r[this.a] = e.r[this.c] !== e.r[this.n];
                break;
            case 10:
                e.r[this.a] = t(e.r[this.c]);
                break;
            case 11:
                e.r[this.a] = e.r[this.c] in e.r[this.n];
                break;
            case 12:
                e.r[this.a] = e.r[this.c] > e.r[this.n];
                break;
            case 13:
                e.r[this.a] = -e.r[this.c];
                break;
            case 14:
                e.r[this.a] = e.r[this.c] < e.r[this.n];
                break;
            case 15:
                e.r[this.a] = e.r[this.c] & e.r[this.n];
                break;
            case 16:
                e.r[this.a] = e.r[this.c] ^ e.r[this.n];
                break;
            case 17:
                e.r[this.a] = e.r[this.c] << e.r[this.n];
                break;
            case 18:
                e.r[this.a] = e.r[this.c] >>> e.r[this.n];
                break;
            case 19:
                e.r[this.a] = e.r[this.c] | e.r[this.n];
                break;
            case 20:
                e.r[this.a] = !e.r[this.c]
        }
    },
    c.prototype.e = function(e) {
        e.Q.push(e.C),
            e.B.push(e.k),
            e.C = e.r[this.s],
            e.k = [];
        for (var t = 0; t < this.i; t++) e.k.unshift(e.f.pop());
        e.g.push(e.f),
            e.f = []
    },
    n.prototype.e = function(e) {
        e.C = e.Q.pop(),
            e.k = e.B.pop(),
            e.f = e.g.pop()
    },
    e.prototype.e = function(e) {
        switch (this.t) {
            case 0:
                e.u = e.r[this.a] >= e.r[this.c];
                break;
            case 1:
                e.u = e.r[this.a] <= e.r[this.c];
                break;
            case 2:
                e.u = e.r[this.a] > e.r[this.c];
                break;
            case 3:
                e.u = e.r[this.a] < e.r[this.c];
                break;
            case 4:
                e.u = e.r[this.a] == e.r[this.c];
                break;
            case 5:
                e.u = e.r[this.a] != e.r[this.c];
                break;
            case 6:
                e.u = e.r[this.a];
                break;
            case 7:
                e.u = !e.r[this.a]
        }
    },
    o.prototype.e = function(e) {
        switch (this.t) {
            case 0:
                e.C = this.h;
                break;
            case 1:
                e.u && (e.C = this.h);
                break;
            case 2:
                e.u || (e.C = this.h);
                break;
            case 3:
                e.C = this.h,
                    e.w = null
        }
        e.u = !1
    },
    r.prototype.e = function(e) {
        switch (this.t) {
            case 0:
                for (var t = [], n = 0; n < this.i; n++) t.unshift(e.f.pop());
                e.r[3] = e.r[this.s](t[0], t[1]);
                break;
            case 1:
                for (var r = e.f.pop(), i = [], o = 0; o < this.i; o++) i.unshift(e.f.pop());
                e.r[3] = e.r[this.s][r](i[0], i[1]);
                break;
            case 2:
                for (var a = [], c = 0; c < this.i; c++) a.unshift(e.f.pop());
                e.r[3] = new e.r[this.s](a[0], a[1])
        }
    };
var k = function(e) {
    for (var t = 66, n = [], r = 0; r < e.length; r++) {
        var i = 24 ^ e.charCodeAt(r) ^ t;
        n.push(String.fromCharCode(i)),
            t = i
    }
    return n.join('')
};

function Q(e) {
    this.t = (4095 & e) >> 10,
        this.s = (1023 & e) >> 8,
        this.i = 1023 & e,
        this.h = 63 & e
}

function C(e) {
    this.t = (4095 & e) >> 10,
        this.a = (1023 & e) >> 8,
        this.c = (255 & e) >> 6
}

function B(e) {
    this.s = (3072 & e) >> 10,
        this.h = 1023 & e
}

function f(e) {
    this.h = 4095 & e
}

function g(e) {
    this.s = (3072 & e) >> 10
}

function u(e) {
    this.h = 4095 & e
}

function w(e) {
    this.t = (3840 & e) >> 8,
        this.s = (192 & e) >> 6,
        this.i = 63 & e
}

function G() {
    this.r = [
            0,
            0,
            0,
            0
        ],
        this.C = 0,
        this.Q = [],
        this.k = [],
        this.B = [],
        this.f = [],
        this.g = [],
        this.u = !1,
        this.G = [],
        this.b = [],
        this.o = !1,
        this.w = null,
        this.U = null,
        this.F = [],
        this.R = 0,
        this.J = {
            0: s,
            1: i,
            2: h,
            3: a,
            4: c,
            5: n,
            6: e,
            7: o,
            8: r,
            9: Q,
            10: C,
            11: B,
            12: f,
            13: g,
            14: u,
            15: w
        }
}
Q.prototype.e = function(e) {
        switch (this.t) {
            case 0:
                e.f.push(e.r[this.s]);
                break;
            case 1:
                e.f.push(this.i);
                break;
            case 2:
                e.f.push(e.k[this.h]);
                break;
            case 3:
                e.f.push(k(e.b[this.h]))
        }
    },
    C.prototype.e = function(A) {
        switch (this.t) {
            case 0:
                var t = A.f.pop();
                A.r[this.a] = A.r[this.c][t];
                break;
            case 1:
                var s = A.f.pop(),
                    i = A.f.pop();
                A.r[this.c][s] = i;
                break;
            case 2:
                var h = A.f.pop();
                A.r[this.a] = eval(h)
        }
    },
    B.prototype.e = function(e) {
        e.r[this.s] = k(e.b[this.h])
    },
    f.prototype.e = function(e) {
        e.w = this.h
    },
    g.prototype.e = function(e) {
        throw e.r[this.s]
    },
    u.prototype.e = function(e) {
        var t = this,
            n = [
                0
            ];
        e.k.forEach((function(e) {
            n.push(e)
        }));
        var r = function(r) {
            var i = new G;
            return i.k = n,
                i.k[0] = r,
                i.v(e.G, t.h, e.b, e.F),
                i.r[3]
        };
        r.toString = function() {
                return '() { [native code] }'
            },
            e.r[3] = r
    },
    w.prototype.e = function(e) {
        switch (this.t) {
            case 0:
                for (var t = {}, n = 0; n < this.i; n++) {
                    var r = e.f.pop();
                    t[e.f.pop()] = r
                }
                e.r[this.s] = t;
                break;
            case 1:
                for (var i = [], o = 0; o < this.i; o++) i.unshift(e.f.pop());
                e.r[this.s] = i
        }
    },
    G.prototype.D = function(e) {
        for (var t = atob(e), n = t.charCodeAt(0) << 8 | t.charCodeAt(1), r = [], i = 2; i < n + 2; i += 2) r.push(t.charCodeAt(i) << 8 | t.charCodeAt(i + 1));
        this.G = r;
        for (var o = [], a = n + 2; a < t.length;) {
            var c = t.charCodeAt(a) << 8 | t.charCodeAt(a + 1),
                u = t.slice(a + 2, a + 2 + c);
            o.push(u),
                a += c + 2
        }
        this.b = o
    },
    G.prototype.v = function(e, t, n) {
        for (t = t || 0, n = n || [], this.C = t, 'string' == typeof e ? this.D(e) : (this.G = e, this.b = n), this.o = !0, this.R = Date.now(); this.o;) {
            var r = this.G[this.C++];
            if ('number' != typeof r) break;
            var i = Date.now();
            if (500 < i - this.R) return;
            this.R = i;
            try {
                this.e(r)
            } catch (e) {
                this.U = e,
                    this.w && (this.C = this.w)
            }
        }
    },
    G.prototype.e = function(e) {
        var t = (61440 & e) >> 12;
        new this.J[t](e).e(this)
    },
    (new G).v('AxjgB5MAnACoAJwBpAAAABAAIAKcAqgAMAq0AzRJZAZwUpwCqACQACACGAKcBKAAIAOcBagAIAQYAjAUGgKcBqFAuAc5hTSHZAZwqrAIGgA0QJEAJAAYAzAUGgOcCaFANRQ0R2QGcOKwChoANECRACQAsAuQABgDnAmgAJwMgAGcDYwFEAAzBmAGcSqwDhoANECRACQAGAKcD6AAGgKcEKFANEcYApwRoAAxB2AGcXKwEhoANECRACQAGAKcE6AAGgKcFKFANEdkBnGqsBUaADRAkQAkABgCnBagAGAGcdKwFxoANECRACQAGAKcGKAAYAZx+rAZGgA0QJEAJAAYA5waoABgBnIisBsaADRAkQAkABgCnBygABoCnB2hQDRHZAZyWrAeGgA0QJEAJAAYBJwfoAAwFGAGcoawIBoANECRACQAGAOQALAJkAAYBJwfgAlsBnK+sCEaADRAkQAkABgDkACwGpAAGAScH4AJbAZy9rAiGgA0QJEAJACwI5AAGAScH6AAkACcJKgAnCWgAJwmoACcJ4AFnA2MBRAAMw5gBnNasCgaADRAkQAkABgBEio0R5EAJAGwKSAFGACcKqAAEgM0RCQGGAYSATRFZAZzshgAtCs0QCQAGAYSAjRFZAZz1hgAtCw0QCQAEAAgB7AtIAgYAJwqoAASATRBJAkYCRIANEZkBnYqEAgaBxQBOYAoBxQEOYQ0giQKGAmQABgAnC6ABRgBGgo0UhD/MQ8zECALEAgaBxQBOYAoBxQEOYQ0gpEAJAoYARoKNFIQ/zEPkAAgChgLGgkUATmBkgAaAJwuhAUaCjdQFAg5kTSTJAsQCBoHFAE5gCgHFAQ5hDSCkQAkChgBGgo0UhD/MQ+QACAKGAsaCRQCOYGSABoAnC6EBRoKN1AUEDmRNJMkCxgFGgsUPzmPkgAaCJwvhAU0wCQFGAUaCxQGOZISPzZPkQAaCJwvhAU0wCQFGAUaCxQMOZISPzZPkQAaCJwvhAU0wCQFGAUaCxQSOZISPzZPkQAaCJwvhAU0wCQFGAkSAzRBJAlz/B4FUAAAAwUYIAAIBSITFQkTERwABi0GHxITAAAJLwMSGRsXHxMZAAk0Fw8HFh4NAwUABhU1EBceDwAENBcUEAAGNBkTGRcBAAFKAAkvHg4PKz4aEwIAAUsACDIVHB0QEQ4YAAsuAzs7AAoPKToKDgAHMx8SGQUvMQABSAALORoVGCQgERcCAxoACAU3ABEXAgMaAAsFGDcAERcCAxoUCgABSQAGOA8LGBsPAAYYLwsYGw8AAU4ABD8QHAUAAU8ABSkbCQ4BAAFMAAktCh8eDgMHCw8AAU0ADT4TGjQsGQMaFA0FHhkAFz4TGjQsGQMaFA0FHhk1NBkCHgUbGBEPAAFCABg9GgkjIAEmOgUHDQ8eFSU5DggJAwEcAwUAAUMAAUAAAUEADQEtFw0FBwtdWxQTGSAACBwrAxUPBR4ZAAkqGgUDAwMVEQ0ACC4DJD8eAx8RAAQ5GhUYAAFGAAAABjYRExELBAACWhgAAVoAQAg/PTw0NxcQPCQ5C3JZEBs9fkcnDRcUAXZia0Q4EhQgXHojMBY3MWVCNT0uDhMXcGQ7AUFPHigkQUwQFkhaAkEACjkTEQspNBMZPC0ABjkTEQsrLQ==');
    
var b = function(e) {
    return "2.0_" + __g._encrypt(encodeURIComponent(md5(e)));
};

module.exports = b;

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment