function docx2text($filename) {
return readZippedXML($filename, "word/document.xml");
}
function readZippedXML($archiveFile, $dataFile) {
$zip = new ZipArchive;
if (true === $zip->open($archiveFile)) {
if (($index = $zip->locateName($dataFile)) !== false) {
$data = $zip->getFromIndex($index);
$zip->close();
$xml = new DOMDocument();
$xml->loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
$date=$xml->saveXML();
$date=str_replace("</w:p>","\r\n",$date);
return strip_tags($date);
}
$zip->close();
}
return "";
}

代码参考修改自:
http://webcheatsheet.com/PHP/reading_the_clean_text_from_docx_odt.php
http://stackoverflow.com/questions/22632086/error-in-domdocumentloadxml

Related Posts: PHP-读取docx文件纯文本 :