No more concern for reading the PDF doc.We can extract PDF using selenium.This post helps you out
Download pdfbox-1.8.8 Jar file and add it to your BuildPath
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
public class SimpleTest {
public static void main(String args[]) throws IOException {
PDFTextStripper pdfStripper = null;
PDDocument pdDoc = null;
COSDocument cosDoc = null;
File file = new File();
PDFParser parser = new PDFParser(new FileInputStream(file));
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
String parsedText = pdfStripper.getText(pdDoc);
System.out.println(parsedText);
}
}
Download pdfbox-1.8.8 Jar file and add it to your BuildPath
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
public class SimpleTest {
public static void main(String args[]) throws IOException {
PDFTextStripper pdfStripper = null;
PDDocument pdDoc = null;
COSDocument cosDoc = null;
File file = new File();
PDFParser parser = new PDFParser(new FileInputStream(file));
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
String parsedText = pdfStripper.getText(pdDoc);
System.out.println(parsedText);
}
}
Post a Comment
Post a Comment