Skip to content

Instantly share code, notes, and snippets.

@hashaam
Last active May 13, 2022 06:18
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hashaam/31f51d4044a03473c18a168f4999f063 to your computer and use it in GitHub Desktop.
Save hashaam/31f51d4044a03473c18a168f4999f063 to your computer and use it in GitHub Desktop.
Strip HTML tags in Swift
// https://hashaam.com/2017/06/11/strip-html-tags-in-swift/
let htmlString = "LCD Soundsystem was the musical project of producer <a href='http://www.last.fm/music/James+Murphy' class='bbcode_artist'>James Murphy</a>, co-founder of <a href='http://www.last.fm/tag/dance-punk' class='bbcode_tag' rel='tag'>dance-punk</a> label <a href='http://www.last.fm/label/DFA' class='bbcode_label'>DFA</a> Records. Formed in 2001 in New York City, New York, United States, the music of LCD Soundsystem can also be described as a mix of <a href='http://www.last.fm/tag/alternative%20dance' class='bbcode_tag' rel='tag'>alternative dance</a> and <a href='http://www.last.fm/tag/post%20punk' class='bbcode_tag' rel='tag'>post punk</a>, along with elements of <a href='http://www.last.fm/tag/disco' class='bbcode_tag' rel='tag'>disco</a> and other styles. <br />"
let htmlStringData = htmlString.data(using: String.Encoding.utf8)!
let options: [String: Any] = [
NSDocumentTypeDocumentAttribute: NSHTMLTextDocumentType,
NSCharacterEncodingDocumentAttribute: String.Encoding.utf8.rawValue
]
DispatchQueue.global(qos: .userInitiated).async {
// perform in background thread
let attributedString = try! NSAttributedString(data: htmlStringData, options: options, documentAttributes: nil)
DispatchQueue.main.async {
// handle text in main thread
let stringWithoutHTMLTags = attributedString.string
}
}
@nthrussell
Copy link

could you update the code in swift 5 please, I have tried but its not working

@gpaulbr
Copy link

gpaulbr commented May 16, 2019

could you update the code in swift 5 please, I have tried but its not working

Replace options with:
let options: [NSAttributedString.DocumentReadingOptionKey : Any] = [
.documentType: NSAttributedString.DocumentType.html,
.characterEncoding: String.Encoding.utf8.rawValue
]

@msiddiqurrehman
Copy link

msiddiqurrehman commented Jun 21, 2019

could you update the code in swift 5 please, I have tried but its not working

Replace options with:
let options: [NSAttributedString.DocumentReadingOptionKey : Any] = [
.documentType: NSAttributedString.DocumentType.html,
.characterEncoding: String.Encoding.utf8.rawValue
]

Hi,
Thanks for this help. Is there any specific reason behind doing this with "DispatchQueue" ?
I tried to make the code reusable by putting it inside a class, but then it didn't work.
Below is the code :-

import UIKit

class StringUtilities{
    
    var stringWithoutHTMLTags = "stringWithoutHTMLTags"
    
    func removeHTMLTagsFromString(_ HTMLString: String) -> String {
        
        let htmlStringData = HTMLString.data(using: String.Encoding.utf8)!
        
        let options: [NSAttributedString.DocumentReadingOptionKey : Any] = [
            .documentType: NSAttributedString.DocumentType.html,
            .characterEncoding: String.Encoding.utf8.rawValue
        ]
        
        DispatchQueue.global(qos: .userInitiated).async {
            
            // perform in background thread
            let attributedString = try! NSAttributedString(data: htmlStringData, options: options, documentAttributes: nil)
            
            DispatchQueue.main.async {
                // handle text in main thread
                self.stringWithoutHTMLTags = attributedString.string
            }
        }
        return self.stringWithoutHTMLTags
    }
}

let htmlString = "LCD Soundsystem was the musical project of producer <a href='http://www.last.fm/music/James+Murphy' class='bbcode_artist'>James Murphy</a>, co-founder of <a href='http://www.last.fm/tag/dance-punk' class='bbcode_tag' rel='tag'>dance-punk</a> label <a href='http://www.last.fm/label/DFA' class='bbcode_label'>DFA</a> Records. Formed in 2001 in New York City, New York, United States, the music of LCD Soundsystem can also be described as a mix of <a href='http://www.last.fm/tag/alternative%20dance' class='bbcode_tag' rel='tag'>alternative dance</a> and <a href='http://www.last.fm/tag/post%20punk' class='bbcode_tag' rel='tag'>post punk</a>, along with elements of <a href='http://www.last.fm/tag/disco' class='bbcode_tag' rel='tag'>disco</a> and other styles. <br />"

let strObj = StringUtilities()

var newStr = strObj.removeHTMLTagsFromString(htmlString)
print("Output : \(newStr)")

The Expected output was complete string without html tags, but it printed the initial value assigned to the class property.

However when I tried below code, it worked!!!

import UIKit

class StringUtilities{
    
    var stringWithoutHTMLTags = "stringWithoutHTMLTags"
    
    func removeHTMLTagsFromString(_ HTMLString: String) -> String {
        
        let htmlStringData = HTMLString.data(using: String.Encoding.utf8)!
        
        let options: [NSAttributedString.DocumentReadingOptionKey : Any] = [
            .documentType: NSAttributedString.DocumentType.html,
            .characterEncoding: String.Encoding.utf8.rawValue
        ]
        
        let attributedString = try! NSAttributedString(data: htmlStringData, options: options, documentAttributes: nil)
        
        self.stringWithoutHTMLTags = attributedString.string
        
        return self.stringWithoutHTMLTags
    }
}

let htmlString = "LCD Soundsystem was the musical project of producer <a href='http://www.last.fm/music/James+Murphy' class='bbcode_artist'>James Murphy</a>, co-founder of <a href='http://www.last.fm/tag/dance-punk' class='bbcode_tag' rel='tag'>dance-punk</a> label <a href='http://www.last.fm/label/DFA' class='bbcode_label'>DFA</a> Records. Formed in 2001 in New York City, New York, United States, the music of LCD Soundsystem can also be described as a mix of <a href='http://www.last.fm/tag/alternative%20dance' class='bbcode_tag' rel='tag'>alternative dance</a> and <a href='http://www.last.fm/tag/post%20punk' class='bbcode_tag' rel='tag'>post punk</a>, along with elements of <a href='http://www.last.fm/tag/disco' class='bbcode_tag' rel='tag'>disco</a> and other styles. <br />"

let strObj = StringUtilities()

var newStr = strObj.removeHTMLTagsFromString(htmlString)
print("Output : \(newStr)")

@piemonte
Copy link

extension String {

    public func trimHTMLTags() -> String? {
        guard let htmlStringData = self.data(using: String.Encoding.utf8) else {
            return nil
        }
        
        let options: [NSAttributedString.DocumentReadingOptionKey : Any] = [
            .documentType: NSAttributedString.DocumentType.html,
            .characterEncoding: String.Encoding.utf8.rawValue
        ]
        
        let attributedString = try? NSAttributedString(data: htmlStringData, options: options, documentAttributes: nil)
        return attributedString?.string
    }
    
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment