Created
April 3, 2023 17:55
-
-
Save Matheswaaran/c12ab47634698fdb316049d5706f86c6 to your computer and use it in GitHub Desktop.
Reading .docx files in react.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import React, { useState } from "react"; | |
import PizZip from "pizzip"; | |
import { DOMParser } from "@xmldom/xmldom"; | |
function str2xml(str) { | |
if (str.charCodeAt(0) === 65279) { | |
// BOM sequence | |
str = str.substr(1); | |
} | |
return new DOMParser().parseFromString(str, "text/xml"); | |
} | |
// Get paragraphs as javascript array | |
function getParagraphs(content) { | |
const zip = new PizZip(content); | |
const xml = str2xml(zip.files["word/document.xml"].asText()); | |
const paragraphsXml = xml.getElementsByTagName("w:p"); | |
const paragraphs = []; | |
for (let i = 0, len = paragraphsXml.length; i < len; i++) { | |
let fullText = ""; | |
const textsXml = paragraphsXml[i].getElementsByTagName("w:t"); | |
for (let j = 0, len2 = textsXml.length; j < len2; j++) { | |
const textXml = textsXml[j]; | |
if (textXml.childNodes) { | |
fullText += textXml.childNodes[0].nodeValue; | |
} | |
} | |
if (fullText) { | |
paragraphs.push(fullText); | |
} | |
} | |
return paragraphs; | |
} | |
const DocxReader = () => { | |
const [paragraphs, setParagraphs] = useState([]); | |
const onFileUpload = (event) => { | |
const reader = new FileReader(); | |
let file = event.target.files[0]; | |
reader.onload = (e) => { | |
const content = e.target.result; | |
const paragraphs = getParagraphs(content); | |
setParagraphs(paragraphs); | |
}; | |
reader.onerror = (err) => console.error(err); | |
reader.readAsBinaryString(file); | |
}; | |
return <input type="file" onChange={onFileUpload} name="docx-reader" />; | |
}; | |
export default DocxReader; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment