<?php
/**
 * Copyright Blackbit digital Commerce GmbH <info@blackbit.de>
 *
 * This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

namespace Blackbit\PimBundle\lib\Pim\Parser;

use Psr\Log\LoggerInterface;

/**
 * Parses XML-Files according to given configuration
 */
class XmlParser implements Parser {
    use ResourceBasedParser;

    /** @var array */
	private $config;

	/** @var LoggerInterface */
	private $logger;

    public function __construct(array $config, LoggerInterface $logger)
    {
        $this->config = $config;
        $this->logger = $logger;
    }

    /**
	 * Verarbeitet eine XML-Datei anhand der übergebenen Konfiguration
	 * @param integer $limit Maximale Anzahl an Einträgen, die zurückgeliefert werden sollen
	 * @return array Aus der Datei ausgelesene Einträge; max $limit, wenn $limit != null
	 */
	public function parse($limit = null) {
		$source = $this->config['file'];

        try {
            $filePathOrUrl = $this->getFileOrUrl($source);
        } catch (\InvalidArgumentException $e) {
            $this->logger->warning($e->getMessage());
            return array();
        }

		if (empty($this->config['itemxpath'])) {
			$this->logger->error("No itemxpath given in config");
			return array();
		}

		if (empty($this->config['fields'])) {
			$this->logger->error("No field xpaths given in config");
			return array();
		}

        $items = array();
		try {
            /** @var \DOMDocument $dom */
			$dom = $this->getDomDocument($filePathOrUrl);

			if ($dom === null) {
				return [];
			}

			// Remove namespace
			$root = $dom->documentElement;
			$root->removeAttributeNS($root->getAttributeNode("xmlns")->nodeValue,"");

			// hack hack, cough cough, hack hack
			$dom->loadXML($dom->saveXML($dom));

			$xp = new \DOMXPath($dom);



			$rawItems = $xp->query($this->config['itemxpath']);

			$i = 0;

			foreach ($rawItems as $rawItem) {
				if ($limit != null && $i >= $limit) {
					break;
				}

				$item = array();
				foreach ($this->config['fields'] as $key => $values) {
					if (empty($values['xpath'])) {
						$item[$key] = "";
						continue;
					}
					try {
						$value = $xp->query($values['xpath'], $rawItem);

						if ($value instanceof \DOMNodeList) {
							if ($values['multiValues'] === true) {
								$itemValues = array();
								/** @var \DOMNode $node */
                                foreach($value as $node) {
								    if($node->hasChildNodes()) {
                                        $itemValues[] = self::xml_to_array($node);
                                    } else {
                                        $itemValues[] = $node->nodeValue;
                                    }
								}
								$item[$key] = serialize($itemValues);
							} else {
								$item[$key] = $value->item(0)->nodeValue;
							}
						} else {
							$item[$key] = "";
						}
					} catch (\Exception $ex) {
						$this->logger->error("Unable to execute xpath: " . $ex);
					}
				}

				$items[] = $item;

				$i++;
			}
		} catch (\Exception $e) {
			$this->logger->error("Unable to parse XML file \"{$source}\": " . $e);
			return array();
		}

		if(empty($limit)) {
            $this->archive($filePathOrUrl, $source);
        }

		return $items;
	}

	protected function getDomDocument($source) {
        // Bug-Workaround: See https://pyd.io/f/topic/failed-to-load-external-entity-boot-confmanifest-xml/page/3/#post-72211
        // and https://bugs.php.net/bug.php?id=64938
        libxml_disable_entity_loader(false);
        $dom = new \DOMDocument();
        if(!@$dom->load($source)) {
            $error = \libxml_get_last_error();
            $this->logger->error('Could not read "'.$source.'". '.$error->message);
            return null;
        }
        return $dom;
	}

	private static function xml_to_array(\DOMNode $root) {
        $result = array();

        if ($root->hasAttributes()) {
            $attrs = $root->attributes;
            foreach ($attrs as $attr) {
                $result['@attributes'][$attr->name] = $attr->value;
            }
        }

        if ($root->hasChildNodes()) {
            $children = $root->childNodes;
            if ($children->length == 1) {
                $child = $children->item(0);
                if (in_array($child->nodeType,[XML_TEXT_NODE,XML_CDATA_SECTION_NODE])) {
                    $result['_value'] = $child->nodeValue;

                    return count($result) == 1
                        ? $result['_value']
                        : $result;
                }
            }
            $groups = array();
            foreach ($children as $child) {
                if($child->nodeType == XML_TEXT_NODE && empty(trim($child->nodeValue))) continue;
                if (!isset($result[$child->nodeName])) {
                    $result[$child->nodeName] = self::xml_to_array($child);
                } else {
                    if (!isset($groups[$child->nodeName])) {
                        $result[$child->nodeName] = array($result[$child->nodeName]);
                        $groups[$child->nodeName] = 1;
                    }
                    $result[$child->nodeName][] = self::xml_to_array($child);
                }
            }
        }

        return $result;
    }
}
