lihnux/StringToByteArray.js

Created August 6, 2014 06:05

Star () You must be signed in to star a gist
Fork () You must be signed in to fork a gist

Learn more about clone URLs
Clone this repository at <script src="https://gist.github.com/lihnux/2aa4a6f5a9170974f6aa.js"></script>
Save lihnux/2aa4a6f5a9170974f6aa to your computer and use it in GitHub Desktop.

Download ZIP

Javascript Convert String to Byte Array

Raw

StringToByteArray.js

	var url = "Hello World";
	var data = [];
	for (var i = 0; i < url.length; i++){
	data.push(url.charCodeAt(i));
	}

techird commented Jul 31, 2018

This is not byte array, this is char code list.

techird commented Jul 31, 2018 •

edited

Loading

function unpack(str) {
    var bytes = [];
    for(var i = 0; i < str.length; i++) {
        var char = str.charCodeAt(i);
        bytes.push(char >>> 8);
        bytes.push(char & 0xFF);
    }
    return bytes;
}

untilbit commented Jul 31, 2018

@techird Your code is char code list or byte array?

dinigo commented Oct 29, 2018

I think this might also work

[...Buffer.from('hello world')]

Insidexa commented Nov 19, 2018

@techird its only latin
use this to, code not my

function toUTF8Array(str) {
    let utf8 = [];
    for (let i = 0; i < str.length; i++) {
        let charcode = str.charCodeAt(i);
        if (charcode < 0x80) utf8.push(charcode);
        else if (charcode < 0x800) {
            utf8.push(0xc0 | (charcode >> 6),
                      0x80 | (charcode & 0x3f));
        }
        else if (charcode < 0xd800 || charcode >= 0xe000) {
            utf8.push(0xe0 | (charcode >> 12),
                      0x80 | ((charcode>>6) & 0x3f),
                      0x80 | (charcode & 0x3f));
        }
        // surrogate pair
        else {
            i++;
            // UTF-16 encodes 0x10000-0x10FFFF by
            // subtracting 0x10000 and splitting the
            // 20 bits of 0x0-0xFFFFF into two halves
            charcode = 0x10000 + (((charcode & 0x3ff)<<10)
                      | (str.charCodeAt(i) & 0x3ff));
            utf8.push(0xf0 | (charcode >>18),
                      0x80 | ((charcode>>12) & 0x3f),
                      0x80 | ((charcode>>6) & 0x3f),
                      0x80 | (charcode & 0x3f));
        }
    }
    return utf8;
}

Insidexa commented Nov 19, 2018

@dinigo yes, works, equal to java str.getBytes(Charsets.UTF_8)

icecraft commented Dec 3, 2019

@Insidexa Good job ！

Gh0u1L5 commented Sep 21, 2020

@Insidexa Imo, these codes are used to convert the char codes from UTF-16 to UTF-8, because the default internal encoding of JS strings is UTF-16. However if all I need is a UTF-16 byte array, I don't have to do so many complex checks and bit operations.

eduardoroliveira commented Jan 28, 2021

also Array.from("111122222333344444555")

eduardoroliveira commented Jan 28, 2021

if you want to convert to array of numbers you could use

Array.from("1111222223333444445556", (x) => Number(x))

ellcs commented Mar 14, 2021

if you want to convert to array of numbers you could use
Array.from("1111222223333444445556", (x) => Number(x))

Array.from("\x00", (x) => Number(x))

results in [NaN]!

If you handle raw bytes in a 0..255 space: Better use a slightly different version with charCodeAt. I didnt test how it behaves with unicode chars.

Array.from("1111222223333444445556", , (x) => x.charCodeAt(0))

paolobertani commented Aug 26, 2022

function unpack(str) {
    var bytes = [];
    for(var i = 0; i < str.length; i++) {
        var char = str.charCodeAt(i);
        bytes.push(char >>> 8);
        bytes.push(char & 0xFF);
    }
    return bytes;
}

this is the correct way to extract the bytes a JavaScript string is made of

String.charCodeAt() returns a 16 bit unsigned integer, it must be split into two bytes if exceeds 0xff

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment