3 * PHP EPub Meta library
5 * @author Andreas Gohr <andi@splitbrain.org>
6 * @author Sébastien Lucas <sebastien@slucas.fr>
9 require_once(realpath( dirname( __FILE__ ) ) . '/tbszip.php');
11 define ("METADATA_FILE", "META-INF/container.xml");
14 public $xml; //FIXME change to protected, later
21 protected $coverpath='';
22 protected $namespaces;
23 protected $imagetoadd='';
28 * @param string $file path to epub file to work on
29 * @throws Exception if metadata could not be loaded
31 public function __construct($file){
34 $this->zip = new clsTbsZip();
35 if(!$this->zip->Open($this->file)){
36 throw new Exception('Failed to read epub file');
39 // read container data
40 if (!$this->zip->FileExists(METADATA_FILE)) {
41 throw new Exception ("Unable to find metadata.xml");
44 $data = $this->zip->FileRead(METADATA_FILE);
46 throw new Exception('Failed to access epub container data');
48 $xml = new DOMDocument();
49 $xml->registerNodeClass('DOMElement','EPubDOMElement');
51 $xpath = new EPubDOMXPath($xml);
52 $nodes = $xpath->query('//n:rootfiles/n:rootfile[@media-type="application/oebps-package+xml"]');
53 $this->meta = $nodes->item(0)->attr('full-path');
56 if (!$this->zip->FileExists($this->meta)) {
57 throw new Exception ("Unable to find " . $this->meta);
60 $data = $this->zip->FileRead($this->meta);
62 throw new Exception('Failed to access epub metadata');
64 $this->xml = new DOMDocument();
65 $this->xml->registerNodeClass('DOMElement','EPubDOMElement');
66 $this->xml->loadXML($data);
67 $this->xml->formatOutput = true;
68 $this->xpath = new EPubDOMXPath($this->xml);
71 public function initSpineComponent ()
73 $spine = $this->xpath->query('//opf:spine')->item(0);
74 $tocid = $spine->getAttribute('toc');
75 $tochref = $this->xpath->query("//opf:manifest/opf:item[@id='$tocid']")->item(0)->attr('href');
76 $tocpath = dirname($this->meta).'/'.$tochref;
78 if (!$this->zip->FileExists($tocpath)) {
79 throw new Exception ("Unable to find " . $tocpath);
82 $data = $this->zip->FileRead($tocpath);
83 $this->toc = new DOMDocument();
84 $this->toc->registerNodeClass('DOMElement','EPubDOMElement');
85 $this->toc->loadXML($data);
86 $this->toc_xpath = new EPubDOMXPath($this->toc);
87 $rootNamespace = $this->toc->lookupNamespaceUri($this->toc->namespaceURI);
88 $this->toc_xpath->registerNamespace('x', $rootNamespace);
94 public function file(){
101 public function close (){
102 $this->zip->FileCancelModif($this->meta);
103 // TODO : Add cancelation of cover image
104 $this->zip->Close ();
108 * Remove iTunes files
110 public function cleanITunesCrap () {
111 if ($this->zip->FileExists("iTunesMetadata.plist")) {
112 $this->zip->FileReplace ("iTunesMetadata.plist", false);
114 if ($this->zip->FileExists("iTunesArtwork")) {
115 $this->zip->FileReplace ("iTunesArtwork", false);
120 * Writes back all meta data changes
122 public function save(){
128 * Get the updated epub
130 public function download($file=false){
131 $this->zip->FileReplace($this->meta,$this->xml->saveXML());
132 // add the cover image
133 if($this->imagetoadd){
134 $this->zip->FileReplace($this->coverpath,file_get_contents($this->imagetoadd));
135 $this->imagetoadd='';
137 if ($file) $this->zip->Flush(TBSZIP_DOWNLOAD, $file);
141 * Get the components list as an array
143 public function components(){
145 $nodes = $this->xpath->query('//opf:spine/opf:itemref');
146 foreach($nodes as $node){
147 $idref = $node->getAttribute('idref');
148 $spine[] = $this->xpath->query("//opf:manifest/opf:item[@id='$idref']")->item(0)->getAttribute('href');
154 * Get the component content
156 public function component($comp) {
157 $path = dirname($this->meta).'/'.$comp;
158 if (!$this->zip->FileExists($path)) {
159 throw new Exception ("Unable to find " . $path);
162 $data = $this->zip->FileRead($path);
163 $data = preg_replace ("/src=[\"']([\w\/\.]*?)[\"']/", "src='epubfs.php?comp=$1'", $data);
164 $data = preg_replace ("/href=[\"']([\w\/\.]*?)[\"']/", "href='epubfs.php?comp=$1'", $data);
169 * Get the component content type
171 public function componentContentType($comp) {
172 return $this->xpath->query("//opf:manifest/opf:item[@href='$comp']")->item(0)->getAttribute('media-type');
176 * Get the Epub content (TOC) as an array
178 * For each chapter there is a "title" and a "src"
180 public function contents(){
182 $nodes = $this->toc_xpath->query('//x:ncx/x:navMap/x:navPoint');
183 foreach($nodes as $node){
184 $title = $this->toc_xpath->query('x:navLabel/x:text', $node)->item(0)->nodeValue;
185 $src = $this->toc_xpath->query('x:content', $node)->item(0)->attr('src');
186 $contents[] = array("title" => $title, "src" => $src);
193 * Get or set the book author(s)
195 * Authors should be given with a "file-as" and a real name. The file as
196 * is used for sorting in e-readers.
201 * 'Pratchett, Terry' => 'Terry Pratchett',
202 * 'Simpson, Jacqeline' => 'Jacqueline Simpson',
205 * @params array $authors
207 public function Authors($authors=false){
209 if($authors !== false){
210 // Author where given as a comma separated list
211 if(is_string($authors)){
215 $authors = explode(',',$authors);
216 $authors = array_map('trim',$authors);
220 // delete existing nodes
221 $nodes = $this->xpath->query('//opf:metadata/dc:creator[@opf:role="aut"]');
222 foreach($nodes as $node) $node->delete();
225 $parent = $this->xpath->query('//opf:metadata')->item(0);
226 foreach($authors as $as => $name){
227 if(is_int($as)) $as = $name; //numeric array given
228 $node = $parent->newChild('dc:creator',$name);
229 $node->attr('opf:role', 'aut');
230 $node->attr('opf:file-as', $as);
239 $nodes = $this->xpath->query('//opf:metadata/dc:creator[@opf:role="aut"]');
240 if($nodes->length == 0){
241 // no nodes where found, let's try again without role
242 $nodes = $this->xpath->query('//opf:metadata/dc:creator');
245 foreach($nodes as $node){
246 $name = $node->nodeValue;
247 $as = $node->attr('opf:file-as');
250 $node->attr('opf:file-as',$as);
253 $node->attr('opf:role','aut');
255 $authors[$as] = $name;
261 * Set or get the book title
263 * @param string $title
265 public function Title($title=false){
266 return $this->getset('dc:title',$title);
270 * Set or get the book's language
272 * @param string $lang
274 public function Language($lang=false){
275 return $this->getset('dc:language',$lang);
279 * Set or get the book' publisher info
281 * @param string $publisher
283 public function Publisher($publisher=false){
284 return $this->getset('dc:publisher',$publisher);
288 * Set or get the book's copyright info
290 * @param string $rights
292 public function Copyright($rights=false){
293 return $this->getset('dc:rights',$rights);
297 * Set or get the book's description
299 * @param string $description
301 public function Description($description=false){
302 return $this->getset('dc:description',$description);
306 * Set or get the book's ISBN number
308 * @param string $isbn
310 public function ISBN($isbn=false){
311 return $this->getset('dc:identifier',$isbn,'opf:scheme','ISBN');
315 * Set or get the Google Books ID
317 * @param string $google
319 public function Google($google=false){
320 return $this->getset('dc:identifier',$google,'opf:scheme','GOOGLE');
324 * Set or get the Amazon ID of the book
326 * @param string $amazon
328 public function Amazon($amazon=false){
329 return $this->getset('dc:identifier',$amazon,'opf:scheme','AMAZON');
333 * Set or get the Calibre UUID of the book
335 * @param string $uuid
337 public function Calibre($uuid=false){
338 return $this->getset('dc:identifier',$uuid,'opf:scheme','calibre');
342 * Set or get the Serie of the book
344 * @param string $serie
346 public function Serie($serie=false){
347 return $this->getset('opf:meta',$serie,'name','calibre:series','content');
351 * Set or get the Serie Index of the book
353 * @param string $serieIndex
355 public function SerieIndex($serieIndex=false){
356 return $this->getset('opf:meta',$serieIndex,'name','calibre:series_index','content');
360 * Set or get the book's subjects (aka. tags)
362 * Subject should be given as array, but a comma separated string will also
365 * @param array $subjects
367 public function Subjects($subjects=false){
369 if($subjects !== false){
370 if(is_string($subjects)){
371 if($subjects === ''){
374 $subjects = explode(',',$subjects);
375 $subjects = array_map('trim',$subjects);
380 $nodes = $this->xpath->query('//opf:metadata/dc:subject');
381 foreach($nodes as $node){
385 $parent = $this->xpath->query('//opf:metadata')->item(0);
386 foreach($subjects as $subj){
387 $node = $this->xml->createElement('dc:subject',htmlspecialchars($subj));
388 $node = $parent->appendChild($node);
396 $nodes = $this->xpath->query('//opf:metadata/dc:subject');
397 foreach($nodes as $node){
398 $subjects[] = $node->nodeValue;
404 * Read the cover data
406 * Returns an associative array with the following keys:
408 * mime - filetype (usually image/jpeg)
409 * data - the binary image data
410 * found - the internal path, or false if no image is set in epub
412 * When no image is set in the epub file, the binary data for a transparent
413 * GIF pixel is returned.
415 * When adding a new image this function return no or old data because the
416 * image contents are not in the epub file, yet. The image will be added when
417 * the save() method is called.
419 * @param string $path local filesystem path to a new cover image
420 * @param string $mime mime type of the given file
423 public function Cover($path=false, $mime=false){
426 // remove current pointer
427 $nodes = $this->xpath->query('//opf:metadata/opf:meta[@name="cover"]');
428 foreach($nodes as $node) $node->delete();
429 // remove previous manifest entries if they where made by us
430 $nodes = $this->xpath->query('//opf:manifest/opf:item[@id="php-epub-meta-cover"]');
431 foreach($nodes as $node) $node->delete();
435 $parent = $this->xpath->query('//opf:metadata')->item(0);
436 $node = $parent->newChild('opf:meta');
437 $node->attr('opf:name','cover');
438 $node->attr('opf:content','php-epub-meta-cover');
441 $parent = $this->xpath->query('//opf:manifest')->item(0);
442 $node = $parent->newChild('opf:item');
443 $node->attr('id','php-epub-meta-cover');
444 $node->attr('opf:href','php-epub-meta-cover.img');
445 $node->attr('opf:media-type',$mime);
447 // remember path for save action
448 $this->imagetoadd = $path;
455 $nodes = $this->xpath->query('//opf:metadata/opf:meta[@name="cover"]');
456 if(!$nodes->length) return $this->no_cover();
457 $coverid = (String) $nodes->item(0)->attr('opf:content');
458 if(!$coverid) return $this->no_cover();
460 $nodes = $this->xpath->query('//opf:manifest/opf:item[@id="'.$coverid.'"]');
461 if(!$nodes->length) return $this->no_cover();
462 $mime = $nodes->item(0)->attr('opf:media-type');
463 $path = $nodes->item(0)->attr('opf:href');
464 $path = dirname('/'.$this->meta).'/'.$path; // image path is relative to meta file
465 $path = ltrim($path,'/');
467 $zip = new ZipArchive();
468 if(!@$zip->open($this->file)){
469 throw new Exception('Failed to read epub file');
471 $data = $zip->getFromName($path);
480 public function getCoverItem () {
481 $nodes = $this->xpath->query('//opf:metadata/opf:meta[@name="cover"]');
482 if(!$nodes->length) return NULL;
484 $coverid = (String) $nodes->item(0)->attr('opf:content');
485 if(!$coverid) return NULL;
487 $nodes = $this->xpath->query('//opf:manifest/opf:item[@id="'.$coverid.'"]');
488 if(!$nodes->length) return NULL;
490 return $nodes->item(0);
493 public function updateForKepub () {
494 $item = $this->getCoverItem ();
495 if (!is_null ($item)) {
496 $item->attr('opf:properties', 'cover-image');
500 public function Cover2($path=false, $mime=false){
502 $item = $this->getCoverItem ();
503 if (is_null ($item)) {
506 $mime = $item->attr('opf:media-type');
507 $this->coverpath = $item->attr('opf:href');
508 $this->coverpath = dirname('/'.$this->meta).'/'.$this->coverpath; // image path is relative to meta file
509 $this->coverpath = ltrim($this->coverpath,'\\');
510 $this->coverpath = ltrim($this->coverpath,'/');
515 if (!$hascover) return; // TODO For now only update
518 $item->attr('opf:media-type',$mime);
520 // remember path for save action
521 $this->imagetoadd = $path;
527 if (!$hascover) return $this->no_cover();
531 * A simple getter/setter for simple meta attributes
533 * It should only be used for attributes that are expected to be unique
535 * @param string $item XML node to set/get
536 * @param string $value New node value
537 * @param string $att Attribute name
538 * @param string $aval Attribute value
539 * @param string $datt Destination attribute
541 protected function getset($item,$value=false,$att=false,$aval=false,$datt=false){
543 $xpath = '//opf:metadata/'.$item;
545 $xpath .= "[@$att=\"$aval\"]";
549 if($value !== false){
550 $value = htmlspecialchars($value);
551 $nodes = $this->xpath->query($xpath);
552 if($nodes->length == 1 ){
554 // the user want's to empty this value -> delete the node
555 $nodes->item(0)->delete();
559 $nodes->item(0)->attr ($datt, $value);
561 $nodes->item(0)->nodeValue = $value;
565 // if there are multiple matching nodes for some reason delete
566 // them. we'll replace them all with our own single one
567 foreach($nodes as $n) $n->delete();
570 $parent = $this->xpath->query('//opf:metadata')->item(0);
572 $node = $parent->newChild ($item);
573 if($att) $node->attr($att,$aval);
575 $node->attr ($datt, $value);
577 $node->nodeValue = $value;
586 $nodes = $this->xpath->query($xpath);
589 return $nodes->item(0)->attr ($datt);
591 return $nodes->item(0)->nodeValue;
599 * Return a not found response for Cover()
601 protected function no_cover(){
603 'data' => base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'),
604 'mime' => 'image/gif',
610 * Reparse the DOM tree
612 * I had to rely on this because otherwise xpath failed to find the newly
615 protected function reparse() {
616 $this->xml->loadXML($this->xml->saveXML());
617 $this->xpath = new EPubDOMXPath($this->xml);
621 class EPubDOMXPath extends DOMXPath {
622 public function __construct(DOMDocument $doc){
623 parent::__construct($doc);
625 if(is_a($doc->documentElement, 'EPubDOMElement')){
626 foreach($doc->documentElement->namespaces as $ns => $url){
627 $this->registerNamespace($ns,$url);
633 class EPubDOMElement extends DOMElement {
634 public $namespaces = array(
635 'n' => 'urn:oasis:names:tc:opendocument:xmlns:container',
636 'opf' => 'http://www.idpf.org/2007/opf',
637 'dc' => 'http://purl.org/dc/elements/1.1/'
641 public function __construct($name, $value='', $namespaceURI=''){
642 list($ns,$name) = $this->splitns($name);
643 $value = htmlspecialchars($value);
644 if(!$namespaceURI && $ns){
645 $namespaceURI = $this->namespaces[$ns];
647 parent::__construct($name, $value, $namespaceURI);
652 * Create and append a new child
654 * Works with our epub namespaces and omits default namespaces
656 public function newChild($name, $value=''){
657 list($ns,$local) = $this->splitns($name);
659 $nsuri = $this->namespaces[$ns];
660 if($this->isDefaultNamespace($nsuri)){
666 // this doesn't call the construcor: $node = $this->ownerDocument->createElement($name,$value);
667 $node = new EPubDOMElement($name,$value,$nsuri);
668 return $this->appendChild($node);
672 * Split given name in namespace prefix and local part
674 * @param string $name
675 * @return array (namespace, name)
677 public function splitns($name){
678 $list = explode(':',$name,2);
679 if(count($list) < 2) array_unshift($list,'');
684 * Simple EPub namespace aware attribute accessor
686 public function attr($attr,$value=null){
687 list($ns,$attr) = $this->splitns($attr);
691 $nsuri = $this->namespaces[$ns];
692 if(!$this->namespaceURI){
693 if($this->isDefaultNamespace($nsuri)){
696 }elseif($this->namespaceURI == $nsuri){
701 if(!is_null($value)){
702 if($value === false){
703 // delete if false was given
705 $this->removeAttributeNS($nsuri,$attr);
707 $this->removeAttribute($attr);
710 // modify if value was given
712 $this->setAttributeNS($nsuri,$attr,$value);
714 $this->setAttribute($attr,$value);
718 // return value if none was given
720 return $this->getAttributeNS($nsuri,$attr);
722 return $this->getAttribute($attr);
728 * Remove this node from the DOM
730 public function delete(){
731 $this->parentNode->removeChild($this);