/home/othynbht/public_html/wp-content/plugins/siteseo-pro/main/importschema.php
<?php
/*
* SITESEO
* https://siteseo.io
* (c) SiteSEO Team
*/

namespace SiteSEOPro;

if(!defined('ABSPATH')){
	die('HACKING ATTEMPT!');
}

class ImportSchema{

	/**
	 * Extract schema from a remote URL
	 * 
	 * @param string $schema_url The URL to fetch schema from
	 * @return array|\WP_Error Array of schemas or WP_Error on failure
	 */
	static function extract_schema_from_url($schema_url){
		// Validate URL format

		if(!wp_http_validate_url($schema_url)){
			return new \WP_Error('invalid_url', __('Invalid URL provided', 'siteseo-pro'));
		}

		$response = wp_remote_get($schema_url, [
			'timeout' => 30,
			'user-agent' => 'SiteSEO-PRO-Schema-Importer/1.0'
		]);
		
		if(is_wp_error($response)){
			return $response;
		}

		$response_code = wp_remote_retrieve_response_code($response);
		if($response_code !== 200){
			return new \WP_Error('http_error', sprintf(__('Remote server returned error code: %d', 'siteseo-pro'), $response_code));
		}

		$html_body = wp_remote_retrieve_body($response);

		if(empty($html_body)){
			return new \WP_Error('empty_response', __('Remote server returned empty response', 'siteseo-pro'));
		}

		return self::extract_schemas_from_html($html_body);
	}

	/**
	 * Extract schemas from HTML content using WP_HTML_Processor for safer parsing
	 * 
	 * @param string $html_body The HTML content to parse
	 * @return array Array of extracted schemas
	 */
	static function extract_schemas_from_html($html_body) {
		$schemas = [];
		
		// We do not want to handle HTML which is too large to handle
		if(!is_string($html_body) || strlen($html_body) > 10000000){
			return new \WP_Error('size_exceeded', __('The size of the HTML is over the allowed limit', 'siteseo-pro'));
		}

		preg_match_all('#<script[^>]+?type=[\'"]application/ld\+json[\'"][^>]*?>(.*?)</script>#is', $html_body, $matches);
		if(empty($matches[1])){
			return new \WP_Error('schema_not_found', __('No schema found in the content', 'siteseo-pro'));
		}

		foreach($matches[1] as $script_content){
			$decoded = json_decode(trim($script_content), true);
			
			if(json_last_error() !== JSON_ERROR_NONE || !is_array($decoded)){
				continue;
			}

			$decoded = self::sanitize_schema_data($decoded);
			$schemas = array_merge($schemas, self::process_decoded_schema($decoded));
		}

		return $schemas;
	}

	/**
	 * Extract schema from JSON string
	 * 
	 * @param string $schema_json The JSON string to parse
	 * @return array|\WP_Error Array of schemas or WP_Error on failure
	 */
	static function extract_schema_from_json($schema_json){
		$decoded = json_decode($schema_json, true);

		if(empty($decoded)){
			return new \WP_Error('schema_not_found', __('Schema not found', 'siteseo-pro'));
		}

		$decoded = self::sanitize_schema_data($decoded);

		return self::process_decoded_schema($decoded);
	}

	/**
	 * Process a decoded schema array and extract individual schema items
	 * 
	 * @param array $decoded The decoded schema data
	 * @return array Array of processed schema items
	 */
	private static function process_decoded_schema($decoded){
		$schemas = [];
		$context = !empty($decoded['@context']) ? $decoded['@context'] : 'https://schema.org/';
		
		// Handle both @graph arrays and single schema objects
		$items = [];
		if(isset($decoded['@graph']) && is_array($decoded['@graph'])){
			$items = $decoded['@graph'];
		} else if(isset($decoded['@type'])){
			$items = [$decoded];
		}
		
		foreach($items as $item){
			if(!isset($item['@type'])) continue;
			
			$final_schema = [
				'@context' => $context,
				'@graph'   => [$item]
			];
			
			$schemas[] = [
				'type'    => $item['@type'],
				'json_ld' => json_encode($final_schema, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES),
				'schema'  => $final_schema
			];
		}

		return $schemas;
	}

	/**
	 * Sanitize schema data to prevent XSS and other security issues
	 * 
	 * @param array $decoded The schema data to sanitize
	 * @return array Sanitized schema data
	 */
	static function sanitize_schema_data($decoded){
		if(!is_array($decoded)){
			return [];
		}

		array_walk_recursive($decoded, function(&$value, $key){
			$target_keys = ['articleBody', 'text', 'description', 'headline'];
			if(is_string($value) && in_array($key, $target_keys, true)){
				$value = wp_kses_post($value);
			}else if(is_string($value)){
				$value = sanitize_text_field($value);
			}
		});

		return $decoded;
	}
}