<?php

namespace yndenz\Plugins\Klantenvertellen;

use DomDocument;
use DOMNode;
use DOMXPath;
use Exception;
use stdClass;

class Scraper {

	private $review_query = '//*[contains(@class,"review-list")]//*[@class="review"]';
	private $author_query = './/*[contains(@class,"name-city")]/text()';
	private $date_query = './/*[contains(@class,"created-date")]/@data-date';
	private $rating_query = './/*[contains(@class,"rating-number")]/span/text()';
	private $title_query = './/*[contains(@class,"title")]/h4/text()';
	private $average_query = '//*[contains(@class,"review-summary")]/*[contains(@class,"ratings-per-division")]//*[contains(@class,"rating")]/text()';
	private $total_num_reviews_query = '//*[contains(@class,"review-summary")]/*[contains(@class,"ratings-per-division")]//*[contains(@itemprop,"reviewCount")]/@content';
	private $last_year_average_query = '//*[contains(@class,"review-summary")]/*[contains(@class,"overall-rating")]/*[contains(@class,"rating-number")]/span/text()';
	private $last_year_num_reviews_query = '//*[contains(@class,"review-summary")]/*[contains(@class,"overall-rating")]/*[contains(@class,"summary")]/*[contains(@class,"amount")]/text()';

	/** @var string */
	private $base_url;

	/** @var Reviews|null */
	private $cached_result;

	/** @var DomDocument */
	private $dom;

	/** @var DOMXPath */
	private $xpath;

	public function __construct( $base_url, $cached_result = null ) {
		libxml_use_internal_errors( true );
		$this->base_url      = $base_url;
		$this->cached_result = $cached_result;
		$this->dom           = new DomDocument();
	}

	/**
	 * Fetch a list of reviews from klantenvertellen.nl.
	 * Uses a cache file to speed up the process.
	 *
	 * @return Reviews
	 */
	public function fetchReviews() {
		$result = new Reviews();

		try {
			$this->_fetchPage( 1 );
			$reviews_per_page = $this->_countReviews();

			if ( $reviews_per_page === 0 ) {
				throw new Exception( __( 'Failed to fetch reviews', 'klantenvertellen' ) );
			}

			$num_reviews = $this->_getTotalNumReviews();

			$result->average                 = $this->_getAverage();
			$result->found_reviews           = $num_reviews;
			$result->last_year_average       = $this->_getLastYearAverage();
			$result->last_year_found_reviews = $this->_getLastYearNumReviews();
			$result->reviews                 = $this->_fetchReviews( $num_reviews, $reviews_per_page );
			$result->calculatePercentageRecommending();
			$result->calculateLastYearPercentageRecommending();
		} catch ( Exception $e ) {
			if ( ! self::_notify_yndenz( 'Fout in klantenvertellen plugin', $e->getMessage() ) ) {
				mail( 'developers@yndenz.com', 'Fout in klantenvertellen plugin', $e->getMessage() );
			}

			if ( ! is_null( $this->cached_result ) ) {
				return $this->cached_result;
			}
		}

		return $result;
	}

	/**
	 * Fetch klantenvertellen page.
	 *
	 * @param int $page
	 *
	 * @return bool
	 */
	private function _fetchPage( $page ) {
		@$this->dom->loadHTMLFile( $this->_getUrl( $page ) );
		$this->xpath = new DOMXPath( $this->dom );

		return true;
	}

	/**
	 * Define URL to fetch klantenvertellen page from.
	 *
	 * @param $page
	 *
	 * @return string
	 */
	private function _getUrl( $page ) {
		return $this->base_url . '?lang=nl&limit=25&pageNumber=' . ( $page - 1 );
	}

	/**
	 * Count number of reviews on the current klantenvertellen page.
	 *
	 * @return int
	 * @throws Exception
	 */
	private function _countReviews() {
		return $this->xpath->query( $this->review_query )->count();
	}

	/**
	 * Fetch the review average rating from the klantenvertellen page.
	 *
	 * @return float
	 * @throws Exception
	 */
	private function _getAverage() {
		$average = $this->xpath->query( $this->average_query );
		if ( $average->count() === 0 ) {
			throw new Exception( __( 'Failed to fetch average', 'klantenvertellen' ) );
		}

		$average_as_string = trim( strval( $average->item( 0 )->nodeValue ) );

		return floatval( str_replace( ',', '.', $average_as_string ) );
	}

	/**
	 * Fetch the total number of reviews from the klantenvertellen page.
	 *
	 * @return int
	 * @throws Exception
	 */
	private function _getTotalNumReviews() {
		$num_reviews = $this->xpath->query( $this->total_num_reviews_query );
		if ( $num_reviews->count() === 0 ) {
			throw new Exception( __( 'Failed to fetch num reviews', 'klantenvertellen' ) );
		}

		$num_reviews_as_string = trim( strval( $num_reviews->item( 0 )->nodeValue ) );

		return intval( $num_reviews_as_string );
	}

	/**
	 * Fetch the review average rating in the past 12 months from the klantenvertellen page.
	 *
	 * @return float
	 * @throws Exception
	 */
	private function _getLastYearAverage() {
		$average = $this->xpath->query( $this->last_year_average_query );
		if ( $average->count() === 0 ) {
			throw new Exception( __( 'Failed to fetch last year average', 'klantenvertellen' ) );
		}

		$average_as_string = trim( strval( $average->item( 0 )->nodeValue ) );

		return floatval( str_replace( ',', '.', $average_as_string ) );
	}

	/**
	 * Fetch the total number of reviews in the past 12 months from the klantenvertellen page.
	 *
	 * @return int
	 * @throws Exception
	 */
	private function _getLastYearNumReviews() {
		$num_reviews = $this->xpath->query( $this->last_year_num_reviews_query );
		if ( $num_reviews->count() === 0 ) {
			throw new Exception( __( 'Failed to fetch last year num reviews', 'klantenvertellen' ) );
		}

		$num_reviews_as_string = trim( strval( $num_reviews->item( 0 )->nodeValue ) );

		return intval( substr( $num_reviews_as_string, 0, strpos( $num_reviews_as_string, ' ' ) ) );
	}

	/**
	 * Fetch all reviews from klantenvertellen.
	 * Compares each review with the cache to prevent unnecessary looping through each page and review.
	 *
	 * @param int $num_reviews
	 * @param int $reviews_per_page
	 *
	 * @return array
	 * @throws Exception
	 */
	private function _fetchReviews( $num_reviews, $reviews_per_page ) {
		$page          = 1;
		$reviews       = array();
		$newest_cached = $this->_getNewestCachedReview();

		while ( $page * $reviews_per_page < $num_reviews ) {
			if ( ! $this->_convertReviews( $reviews, $newest_cached ) ) {
				break;
			}

			$this->_fetchPage( ++ $page );
			if ( $this->_countReviews() === 0 ) {
				break;
			}
		}

		return $reviews;
	}

	/**
	 * Get newest review from cache.
	 *
	 * @return Review|null
	 */
	private function _getNewestCachedReview() {
		if ( ! is_null( $this->cached_result ) &&
		     property_exists( $this->cached_result, 'reviews' ) &&
		     count( $this->cached_result->reviews ) ) {
			return $this->cached_result->reviews[0];
		}

		return null;
	}

	/**
	 * Convert all reviews on the current page.
	 * Returns true if the review being converted is the same as the newest cached review.
	 *
	 * @param Review[] $reviews
	 * @param stdClass|null $newest_cached
	 *
	 * @return bool
	 * @throws Exception
	 */
	private function _convertReviews( &$reviews, $newest_cached = null ) {
		$count = $this->_countReviews();

		for ( $i = 1; $i <= $count; $i ++ ) {
			$review = $this->_fetchReview( $i );
			if ( isset( $last_cached ) && $review == $newest_cached ) {
				return false;
			}

			array_push( $reviews, $review );
		}

		return true;
	}

	/**
	 * Fetch specific review on klantenvertellen page.
	 *
	 * @param int $num
	 *
	 * @return Review
	 */
	private function _fetchReview( $num ) {
		$review = $this->xpath->evaluate( '(' . $this->review_query . ')[' . $num . ']' )->item( 0 );

		return $this->_convertReview( $review );
	}

	/**
	 * Convert review from klantenvertellen page into a Review object.
	 *
	 * @param DOMNode $element
	 *
	 * @return Review
	 */
	private function _convertReview( $element ) {
		$review         = new Review();
		$review->author = trim( preg_replace( '/\s\s+/', ' ', str_replace( ",", "uit", $this->_getElementValue( $element, $this->author_query ) ) ) );
		$date           = $this->_getElementValue( $element, $this->date_query );
		$review->date   = strtotime( $date );
		$rating         = $this->_getElementValue( $element, $this->rating_query );
		$review->rating = floatval( str_replace( ',', '.', $rating ) );
		$review->title  = $this->_getElementValue( $element, $this->title_query );

		$answered_questions = $this->_answeredQuestions( $element );
		for ( $i = 1; $i <= $answered_questions; $i ++ ) {
			list( $question, $answer ) = $this->_fetchQuestion( $element, $i );

			switch ( $question ) {
				case 'Aanbevelen?':
					$review->recommend = $answer === 'Ja';
					break;
				case 'Beschrijf de ervaringen met dit bedrijf':
					$review->text = $answer;
					break;
				case 'Reden bezoek':
					$review->reason = $answer;
					break;
				default:
					break;
			}
		}

		if ( empty( $review->text ) ) {
			$review->text = $this->_getElementValue( $element, './/*[contains(@class,"opinion")]/text()' );
		}

		return $review;
	}

	/**
	 * Get a value from the specified node based the specified query.
	 *
	 * @param DOMNode $element
	 * @param string $query
	 *
	 * @return string
	 */
	private function _getElementValue( $element, $query ) {
		if ( ! empty( $this->xpath->query( $query, $element )->item( 0 )->nodeValue ) ) {
			return trim( strval( $this->xpath->query( $query, $element )->item( 0 )->nodeValue ) );
		} else {
			return "";
		}
	}

	/**
	 * Fetch a the number of questions answered by the reviewer on klantenvertellen.
	 *
	 * @param DOMNode $element
	 *
	 * @return int
	 */
	private function _answeredQuestions( $element ) {
		return intval( $this->xpath->evaluate( 'count(.//*[contains(@class,"question-wrapper")])', $element ) );
	}

	/**
	 * Fetch an item from the list of questions answered by the reviewer on klantenvertellen.
	 *
	 * @param DOMNode $element
	 * @param int $num_question
	 *
	 * @return array
	 */
	private function _fetchQuestion( $element, $num_question ) {
		if ( $this->xpath->query( '(.//*[contains(@class,"question-wrapper")])[' . $num_question . ']/*[contains(@class,"question")]/text()', $element )->item( 0 ) ) {
			$question = $this->xpath->query( '(.//*[contains(@class,"question-wrapper")])[' . $num_question . ']/*[contains(@class,"question")]/text()', $element )->item( 0 )->nodeValue;
		} else {
			$question = "";
		}
		if ( $this->xpath->query( '(.//*[contains(@class,"question-wrapper")])[' . $num_question . ']/*[contains(@class,"rating")]/text()', $element )->item( 0 ) ) {
			$answer = $this->xpath->query( '(.//*[contains(@class,"question-wrapper")])[' . $num_question . ']/*[contains(@class,"rating")]/text()', $element )->item( 0 )->nodeValue;
		} else {
			$answer = "";
		}

		return array( trim( strval( $question ) ), trim( strval( $answer ) ) );
	}

	/**
	 * Send errors to the yndenz notification channel.
	 *
	 * @param string $subject
	 * @param string $message
	 *
	 * @return bool
	 */
	private static function _notify_yndenz( $subject, $message ) {
		$ch = curl_init( 'https://notify.yndenz.com/hooks/vcpYpAqJPbPPpntQF/YHouJ2oDSNAPStAnGQkS9e6zwCa8yz8tHDM23guG8enq9xKq' );
		curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, 'POST' );
		curl_setopt( $ch, CURLOPT_POSTFIELDS, http_build_query( array(
			'username'    => get_bloginfo( 'name' ),
			'text'        => $subject,
			'attachments' => array(
				array(
					'title' => date( 'Y-m-d H:i:s' ),
					'text'  => $message,
				)
			)
		) ) );
		curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
		curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false );
		$response = curl_exec( $ch );
		curl_close( $ch );

		if ( empty( $response ) ) {
			return false;
		}

		$result = json_decode( $response, true );

		return ! empty( $result ) && array_key_exists( 'success', $result ) && $result['success'];
	}
}
