<?php

namespace Klantenvertellen;

use DomDocument;
use DOMXPath;
use stdClass;

class Scraper {

	private $review_query = '//*[contains(@class,"review-list")]//*[@class="review"]';
	private $author_query = './/*[contains(@class,"name-city")]/text()';
	private $date_query = './/*[contains(@class,"created-date")]/@data-date';
	private $rating_query = './/*[contains(@class,"rating-number")]/span/text()';
	private $title_query = './/*[contains(@class,"title")]/h4/text()';
	private $average_query = '//*[contains(@class,"review-summary")]/*[contains(@class,"ratings-per-division")]//*[contains(@itemprop,"ratingValue")]/text()';
	private $num_reviews_query = '//*[contains(@class,"review-summary")]/*[contains(@class,"ratings-per-division")]//*[contains(@itemprop,"reviewCount")]/text()';
	private $last_year_average_query = '//*[contains(@class,"review-summary")]/*[contains(@class,"overall-rating")]/*[contains(@class,"rating-number")]/span/text()';
	private $last_year_num_reviews_query = 'substring-before(//*[contains(@class,"review-summary")]/*[contains(@class,"overall-rating")]/*[contains(@class,"summary")]/*[contains(@class,"amount")]/text()," ")';

	/** @var string */
	private $base_url;

	/** @var stdClass */
	private $cached_result;

	/** @var DomDocument */
	private $dom;

	/** @var DOMXPath */
	private $xpath;

	/** @var int */
	private $page;

	public function __construct( $base_url, $cached_result = null ) {
		libxml_use_internal_errors( true );
		$this->base_url      = $base_url;
		$this->cached_result = $cached_result;
		$this->dom           = new DomDocument();
	}

	public function fetchReviews() {
		$page = 1;
		if ( ! is_null( $this->cached_result ) ) {
			$last_cached = $this->cached_result->reviews[0];
		}

		$reviews = array();

		while ( ( $count = $this->_countReviews( $page ) ) > 0 ) {
			for ( $i = 1; $i <= $count; $i ++ ) {
				$review = $this->_fetchReview( $i );
				if ( isset( $last_cached ) && $review == $last_cached ) {
					break 2;
				}

				array_push( $reviews, $review );
			}

			$page ++;
		}

		$result = new stdClass();

		if ( ! is_null( $this->cached_result ) ) {
			$result->reviews = array_merge( $reviews, $this->cached_result->reviews );
		} else {
			$result->reviews = $reviews;
		}

		$result->average                 = $this->_getAverage();
		$result->found_reviews           = $this->_getNumReviews();
		$result->percentage_recommending = $this->_getPercentageRecommending( $result->reviews );

		$result->last_year_average                 = $this->_getLastYearAverage();
		$result->last_year_found_reviews           = $this->_getLastYearNumReviews();
		$result->last_year_percentage_recommending = $this->_getLastYearPercentageRecommending( $result->reviews );

		return $result;
	}

	private function _countReviews( $page ) {
		$this->_fetchPage( $page );

		return $this->xpath->evaluate( 'count(' . $this->review_query . ')' );
	}

	private function _fetchPage( $page ) {
		@$this->dom->loadHTMLFile( $this->_getUrl( $page ) );
		$this->xpath = new DOMXPath( $this->dom );

		return true;
	}

	private function _getUrl( $page ) {
		return $this->base_url . '?lang=nl&limit=25&pageNumber=' . ( $page - 1 );
	}

	private function _fetchReview( $num ) {
		$review = $this->xpath->evaluate( '(' . $this->review_query . ')[' . $num . ']' )->item( 0 );

		return $this->_convertReview( $review );
	}

	private function _convertReview( $element ) {
		$review            = new stdClass();
		$review->author    = $this->_getElementValue( $element, $this->author_query );
		$date              = $this->_getElementValue( $element, $this->date_query );
		$review->date      = strtotime( $date );
		$rating            = $this->_getElementValue( $element, $this->rating_query );
		$review->rating    = floatval( str_replace( ',', '.', $rating ) );
		$review->title     = $this->_getElementValue( $element, $this->title_query );
		$review->recommend = null;
		$review->reason    = '';
		$review->text      = '';

		$answered_questions = $this->_answeredQuestions( $element );
		for ( $i = 1; $i <= $answered_questions; $i ++ ) {
			list( $question, $answer ) = $this->_fetchQuestion( $element, $i );

			switch ( $question ) {
				case 'Aanbevelen?':
					$review->recommend = $answer === 'Ja';
					break;
				case 'Beschrijf de ervaringen met dit bedrijf':
					$review->text = $answer;
					break;
				case 'Reden bezoek':
					$review->reason = $answer;
					break;
				default:
					break;
			}
		}

		if ( empty( $review->text ) ) {
			$review->text = $this->_getElementValue( $element, './/*[contains(@class,"opinion")]/text()' );
		}

		return $review;
	}

	private function _getElementValue( $element, $query ) {
		return trim( strval( $this->xpath->query( $query, $element )->item( 0 )->nodeValue ) );
	}

	private function _answeredQuestions( $element ) {
		return intval( $this->xpath->evaluate( 'count(.//*[contains(@class,"question-wrapper")])', $element ) );
	}

	private function _fetchQuestion( $element, $num_question ) {
		$question = $this->xpath->query( '(.//*[contains(@class,"question-wrapper")])[' . $num_question . ']/*[contains(@class,"question")]/text()', $element )->item( 0 )->nodeValue;
		$answer   = $this->xpath->query( '(.//*[contains(@class,"question-wrapper")])[' . $num_question . ']/*[contains(@class,"rating")]/text()', $element )->item( 0 )->nodeValue;

		return array( trim( strval( $question ) ), trim( strval( $answer ) ) );
	}

	private function _getAverage() {
		$average_as_string = trim( strval( $this->xpath->query( $this->average_query )->item( 0 )->nodeValue ) );

		return floatval( str_replace( ',', '.', $average_as_string ) );
	}

	private function _getNumReviews() {
		return intval( trim( strval( $this->xpath->query( $this->num_reviews_query )->item( 0 )->nodeValue ) ) );
	}

	private function _getPercentageRecommending( $reviews ) {
		return count( array_filter( $reviews, function ( $review ) {
			return $review->recommend;
		} ) );
	}

	private function _getLastYearAverage() {
		$average_as_string = trim( strval( $this->xpath->query( $this->last_year_average_query )->item( 0 )->nodeValue ) );

		return floatval( str_replace( ',', '.', $average_as_string ) );
	}

	private function _getLastYearNumReviews() {
		return intval( trim( strval( $this->xpath->query( $this->last_year_num_reviews_query )->item( 0 )->nodeValue ) ) );
	}

	private function _getLastYearPercentageRecommending( $reviews ) {
		return count( array_filter( $reviews, function ( $review ) {
			return $review->recommend && $review->date >= strtotime('-1 year');
		} ) );
	}
}
