Skip to content

Instantly share code, notes, and snippets.

@blancoj
Created August 25, 2015 19:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blancoj/53a4cd1052b9b7a32065 to your computer and use it in GitHub Desktop.
Save blancoj/53a4cd1052b9b7a32065 to your computer and use it in GitHub Desktop.
require 'spec_helper'
describe Hydra::Works::FullTextExtractionService do
describe "extract pdf file" do
before do
@myfile = Hydra::Works::GenericFile::Base.create
Hydra::Works::AddFileToGenericFile.call(@myfile, File.open(File.join(fixture_path, 'test4.pdf')), :original_file)
Hydra::Works::FullTextExtractionService.run(@myfile)
end
it "returns expected results after a save" do
expect(@myfile.file_size).to eq ['218882']
expect(@myfile.original_checksum).to eq ['5a2d761cab7c15b2b3bb3465ce64586d']
expect(@myfile.title).to include("Microsoft Word - sample.pdf.docx")
expect(@myfile.filename).to eq 'test4.pdf'
expect(@myfile.format_label).to eq ["Portable Document Format"]
expect(@myfile.title).to include("Microsoft Word - sample.pdf.docx")
expect(@myfile.extracted_text.content).to eq("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nMicrosoft Word - sample.pdf.docx\n\n\n \n \n\n \n\n \n\n \n\nThis PDF file was created using CutePDF. \n\nwww.cutepdf.com")
end
end
describe "m4a" do
before do
@myfile = Hydra::Works::GenericFile::Base.create
Hydra::Works::AddFileToGenericFile.call(@myfile, File.open(File.join(fixture_path, 'spoken-text.m4a')), :original_file)
Hydra::Works::FullTextExtractionService.run(@myfile)
end
it "returns expected content for full text" do
expect(@myfile.extracted_text.content).to eq("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLavf56.15.102")
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment