engine/public/javascripts/admin/aloha/plugins/com.gentics.aloha.plugins.LinkChecker/proxy.php

236 lines
6.7 KiB
PHP

<?php
/**
* Gentics Aloha Editor AJAX Gateway
* Copyright (c) 2010 Gentics Software GmbH
* Licensed unter the terms of http://www.aloha-editor.com/license.html
* aloha-sales@gentics.com
* Author Haymo Meran h.meran@gentics.com
* Author Johannes Schüth j.schuet@gentics.com
* Author Tobias Steiner t.steiner@gentics.com
*
* Testing from the command line:
* function getallheaders(){return array('X-Gentics' => 'X');};
* https url example: https://google.com/adsense
*
*/
// for debugging
//$_SERVER['SERVER_PROTOCOL'] = 'HTTP/1.0';
//$_SERVER['REQUEST_METHOD'] = 'HEAD';
//error_reporting(E_ALL);
$request = array(
'method' => $_SERVER['REQUEST_METHOD'],
'protocol' => $_SERVER['SERVER_PROTOCOL'],
'headers' => getallheaders(),
//possibly use $HTTP_RAW_POST_DATA if available
'payload' => http_build_query($_POST),
);
// read url parameter
if (array_key_exists('url', $_GET)) {
$request['url'] = urldecode($_GET['url']);
} else {
header("HTTP/1.0 400 Bad Request");
echo "Gentics AJAX Gateway failed because parameter url is missing.";
exit();
}
// check if link exists
$response = http_request($request);
// Note HEAD does not always work even if specified...
// We use HEAD for Linkchecking so we do a 2nd request.
if (strtoupper($method) == 'HEAD' && (int)$response['status'] >= 400 ) {
$request['method'] = 'GET';
$response = http_request($request);
//since we handle a HEAD, we don't need to proxy any contents
fclose($response['socket']);
$response['socket'] = null;
}
// forward each returned header...
foreach ($response['headers'] as $key => $value) {
header("$key: $value");
}
//there is no need to specify a content length since we don't do keep
//alive, and this can cause problems for integration (e.g. gzip output,
//which would change the content length)
header('Content-Length:');
// output the contents if any
if (null !== $response['socket']) {
fpassthru($response['socket']);
fclose($response['socket']);
}
exit;
/**
* Query an HTTP(S) URL with the given request parameters and return the
* response headers and status code. The socket is returned as well and
* will point to the begining of the response payload (after all headers
* have been read), and must be closed with fclose().
* @param $url the request URL
* @param $request the request method may optionally be overridden.
* @param $timeout connection and read timeout in seconds
*/
function http_request($request, $timeout = 5) {
$url = $request['url'];
// Extract the hostname from url
$parts = parse_url($url);
if (array_key_exists('host', $parts)) {
$remote = $parts['host'];
} else {
return myErrorHandler("url ($url) has no host. Is it relative?");
}
if (array_key_exists('port', $parts)) {
$port = $parts['port'];
} else {
$port = 0;
}
// Beware that RFC2616 (HTTP/1.1) defines header fields as case-insensitive entities.
$request_headers = "";
foreach ($request['headers'] as $name => $value) {
switch (strtolower($name)) {
//ommit some headers
case "keep-alive":
case "connection":
case "cookie":
//TPDP: we don't handle any compressions encodings. this is only
//a problem if client communication is already compressed (which
//would double compress the content, once from the remote server
//to us, and once from us to the client, but the client would
//de-compress only once).
case "accept-encoding":
break;
// correct the host parameter
case "host":
$host_info = $remote;
if ($port) {
$host_info .= ':' . $port;
}
$request_headers .= "$name: $host_info\r\n";
break;
// forward all other headers
default:
$request_headers .= "$name: $value\r\n";
break;
}
}
//set fsockopen transport scheme, and the default port
switch (strtolower($parts['scheme'])) {
case 'https':
$scheme = 'ssl://';
if ( ! $port ) $port = 443;
break;
case 'http':
$scheme = '';
if ( ! $port ) $port = 80;
break;
default:
//some other transports are available but not really supported
//by this script: http://php.net/manual/en/transports.inet.php
$scheme = $parts['scheme'] . '://';
if ( ! $port ) {
return myErrorHandler("Unknown scheme ($scheme) and no port.");
}
break;
}
//we make the request with socket operations since we don't want to
//depend on the curl extension, and the higher level wrappers don't
//give us usable error information.
$sock = @fsockopen("$scheme$remote", $port, $errno, $errstr, $timeout);
if ( ! $sock ) {
return myErrorHandler("Unable to open URL ($url): $errstr");
}
//timeout in fsockopen is only for the connection, the following is
//for reading the content
stream_set_timeout($sock, $timeout);
//absolute url should only be specified for proxy requests
if (array_key_exists('path', $parts)) {
$path_info = $parts['path'];
} else {
$path_info = '/';
}
if (array_key_exists('query', $parts)) $path_info .= '?' . $parts['query'];
if (array_key_exists('fragment', $parts)) $path_info .= '#' . $parts['fragment'];
$out = $request["method"]." ".$path_info." ".$request["protocol"]."\r\n"
. $request_headers
. "Connection: Close\r\n\r\n";
fwrite($sock, $out);
fwrite($sock, $request['payload']);
$header_str = stream_get_line($sock, 1024*16, "\r\n\r\n");
$headers = http_parse_headers($header_str);
// get http status
preg_match('|HTTP/\d+\.\d+\s+(\d+)\s+.*|i',$headers[0],$match);
$status = $match[1];
return array('headers' => $headers, 'socket' => $sock, 'status' => $status);
}
/**
* Parses a string containing multiple HTTP header lines into an array
* of key => values.
* Inspired by HTTP::Daemon (CPAN).
*/
function http_parse_headers($header_str) {
$headers = array();
//ignore leading blank lines
$header_str = preg_replace("/^(?:\x0D?\x0A)+/", '', $header_str);
while (preg_match("/^([^\x0A]*?)\x0D?(?:\x0A|\$)/", $header_str, $matches)) {
$header_str = substr($header_str, strlen($matches[0]));
$header_line = $matches[1];
if (empty($headers)) {
// the status line
$headers[] = $header_line;
}
elseif (preg_match('/^([^:\s]+)\s*:\s*(.*)/', $header_line, $matches)) {
if (isset($key)) {
//previous header is finished (was potentially multi-line)
$headers[$key] = $val;
}
list(,$key,$val) = $matches;
}
elseif (preg_match('/^\s+(.*)/', $header_line, $matches)) {
//continue a multi-line header
$val .= " ".$matches[1];
}
else {
//empty (possibly malformed) header signals the end of all headers
break;
}
}
if (isset($key)) {
$headers[$key] = $val;
}
return $headers;
}
function myErrorHandler($msg)
{
// 500 could be misleading...
// Should we return a special Error when a proxy error occurs?
header("HTTP/1.0 500 Internal Error");
echo "Gentics Aloha Editor AJAX Gateway Error: $msg";
exit();
}
//EOF