Translating the DOCX file in machine-readable contents
One of my last tasks is to build clear requirements based on customer WinWord documents; I also do have a strong feeling that the documentation will change in time, and it will be a dirty work to trace it.
As a consequence, I wrote a small utility to render to the browser the contents (not rendition) of a diven DOCX file.
And the transformation (in incipient phase) is below.
RenderWindWord.dsp
<?<http.tmcz.index>?>
<hr/>
<link rel="shortcut icon" href="/favicon.ico">
<script type='text/javascript' src='/js/runtime.js'></script>
<link rel="stylesheet" type="text/css" href="/index.css" />
<select onchange='window.location.href="<?=this.classUrl?>/"+this.value+"/"'>
<option></option>
<?var f=new java.io.File('j:\\amdocs\\TMCZ\\1204091520\\Docs\\').list();
var fileNames=[];
for(var i=0;i<f.length;i++)
if((''+f[i]).indexOf('.docx')>=0)
fileNames.push(''+f[i]);
fileNames.sort(function(a,b){return a<b;});
for(var i=0;i<fileNames.length;i++){?>
<option value='<?&fileNames[i]?>' <?=(fileNames[i]==$_ARGS[0])?'selected':''?>>
<?&fileNames[i].split(".docx").join("")?>
</option>
<?}?>
</select>
<hr/>
<?
if(!$_ARGS[0])
return;
var zipFile=new java.util.zip.ZipFile('j:\\amdocs\\TMCZ\\1204091520\\Docs\\'+$_ARGS[0]);
var docXml=zipFile.getInputStream(zipFile.getEntry('word/document.xml'));
var xslis=new java.io.FileInputStream("j:\\http\\tmcz\\docx2struct.xsl");
var bytes=new dms.xml.XSLT(xslis).transform(docXml);
xslis.close();
docXml.close();
var result=new java.lang.String(bytes);
?>
<?=result?>
Transform.xsl
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" mc:ignorable="w14 wp14">
<xsl:output method="html" version="4.0" encoding="utf-8" indent="no" omit-xml-declaration="no"></xsl:output>
<xsl:template match="/">
<style>
*{font-family: "Arial Unicode MS"}
.Heading1{
border:solid #333333 1px;
border-left:solid #bbbbbb 1em;
background:#bbbbbb;
font-weight:bold;
font-size:1.1em
}
.Heading2{
border:solid #333333 1px;
border-left:solid #bbbbbb 2em;
background:#dddddd;
font-weight:bold;
font-size:1.1em
}
.Heading3{
border:solid #333333 1px;
border-left:solid #bbbbbb 3em;
background:#eeeeee;
}
.Heading4{
border:solid #333333 1px;
border-left:solid #bbbbbb 4em;
background:#eeeeee;
}
.Bullet1square, .Bullet1round{
border-left:solid #bbbbbb 2em;
}
.Bullet2square, .Bullet2round{
border-left:solid #bbbbbb 4em;
}
</style>
<xsl:apply-templates select="node() | @*">
</xsl:apply-templates></xsl:template>
<xsl:template match="w:t"><xsl:value-of select="node()"></xsl:value-of></xsl:template>
<xsl:template match="w:p">
<xsl:element name="p">
<xsl:attribute name="class">
<xsl:value-of select="./w:pPr/w:pStyle/@w:val">
</xsl:value-of></xsl:attribute>
<xsl:apply-templates select="node() | @*">
</xsl:apply-templates></xsl:element>
</xsl:template>
<xsl:template match="w:tbl"><xsl:apply-templates select="node() | @*"></xsl:apply-templates><table border="1"></table></xsl:template>
<xsl:template match="w:tr"><xsl:apply-templates select="node() | @*"></xsl:apply-templates></xsl:template>
<xsl:template match="w:tc"><xsl:apply-templates select="node() | @*"></xsl:apply-templates></xsl:template>
<xsl:template match="w:sectPr"><hr></xsl:template>
<xsl:template match="w:tab"><tab></tab></xsl:template>
<xsl:template match="w:drawing"></xsl:template>
<xsl:template match="w:pict"></xsl:template>
<xsl:template match="node() | @*"><xsl:apply-templates select="node()"></xsl:apply-templates></xsl:template>
</xsl:stylesheet>