2010-11-05 59 views
0

我需要每天从我的本地服务器下载http://www.carriersoftwaredata.com/Login.aspx?file=FHWA的CSV文件。我的计划是执行一个运行php脚本的cron作业来执行此操作。但是,在允许下载文件之前,该页面需要输入用户名和密码。如果我在我的Web浏览器中访问此页面,请输入用户名和密码,然后提交表单,然后我的浏览器会显示一个Download对话框并开始下载该文件。如何使用PHP提交表单并从其他网站检索文件?

如何使用PHP提交表单并下载提供的文件?

这是我目前正在做的,以获得必要的$ _POST信息。

//url for fhwa db file 
$fhwa_url = 'http://www.carriersoftwaredata.com/Login.aspx?file=FHWA'; 

//get the contents of the fhwa page 
$fhwa_login = file_get_contents($fhwa_url); 

//load contents of fhwa page into a DOMDocument object 
$fhwa_dom = new DOMDocument; 
if (!$fhwa_dom->loadhtml($fhwa_login)) 
{ 
    echo 'Could not Load html for FHWA Login page.'; 
} 
else 
{ 
    //create a post array to send back to the server - keys relate to the name of the input 
    $fhwa_post_items = array(
     '__VIEWSTATE'=>'', 
     'Email'=>'', 
     'Password'=>'', 
     '__EVENTVALIDATION'=>'', 
    ); 

    //create an xpath object 
    $xpath = new DOMXpath($fhwa_dom); 

    //iterate through the form1 form and find all inputs 
    foreach($xpath->query('//form[@name="form1"]//input') as $input) 
    { 
     //get name and value of input 
     $input_name = $input->getAttribute('name'); 
     $input_value = $input->getAttribute('value'); 

     //check if input name matches a key in the post array 
     if(array_key_exists($input_name, $fhwa_post_items)) 
     { 
     //if the input name is Email or Password enter the defined email and password 
     switch($input_name) 
     { 
      case 'Email': 
       $input_value = $email; 
       break; 
      case 'Password': 
       $input_value = $pass; 
       break; 
     }//switch 

     //assign value to post array 
     $fhwa_post[$input_name] = $input_value; 
     }// if 
    }// foreach 
}// if 

这就是我如何提交表单 - 但它似乎并没有以我需要的方式工作。我希望stream_get_contents返回的内容是我想要下载的CSV文件的内容。

//get the url data and open a connection to the page 
    $url_data = parse_url($fhwa_url); 
    $post_str = http_build_query($fhwa_post); 

    //create socket 
    $fp = @fsockopen($url_data['host'], 80, $errno, $errstr, 30); 
    fputs($fp, "POST $fhwa_url HTTP/1.0\r\n"); 
    fputs($fp, "Host: {$url_data['host']}\r\n"); 
    fputs($fp, "User-Agent: Mozilla/4.5 [en]\r\n"); 
    fputs($fp, "Content-Type: application/x-www-form-urlencoded\r\n"); 
    fputs($fp, "Content-Length: ".strlen($post_str)."\r\n"); 
    fputs($fp, "\r\n"); 
    fputs($fp, $post_str."\r\n\r\n"); 
    echo stream_get_contents($fp); 
    fclose($fp); 

任何帮助是绝对赞赏。

回答

0

我发现,我原来做了工作,但我根本没有足够发送的公告信息。我写了一个函数来帮助我在将来轻松地做到这一点。

/** 
* @author: jeremysawesome - www.dnawebagency.com 
* @description: save_remote_file - Requires a url, a post array and a 
* filename. This function will send the post_array to the remote server 
* and retrieve the remote file. The retrieved remote file will be 
* saved to the filename specified by the filename parameter. 
* Note: This function should only be used when specific post information 
*  must be submitted to the remote server before the file download 
*  will begin. 
*  For Example: http://www.carriersoftwaredata.com/Login.aspx?file=FHWA 
* @param: post_url - the url to send a post request to 
* @param: post_array - the post arguments to send to the remote server. 
* @param: filename - the name to save the retrieved remote file as 
**/ 
function save_remote_file($post_url, $post_array, $filename) 
{ 
    //get the url data 
    $url_data = parse_url($post_url); 
    $post_str = http_build_query($post_array); 

    //build the headers to send to the form 
    $headers = "POST $post_url HTTP/1.0\r\n"; 
    $headers .= "Host: {$url_data['host']}\r\n"; 
    $headers .= "User-Agent: Mozilla/4.5 [en]\r\n"; 
    $headers .= "Content-Type: application/x-www-form-urlencoded\r\n"; 
    $headers .= "Content-Length: ".strlen($post_str)."\r\n"; 
    $headers .= "\r\n"; 
    $headers .= $post_str."\r\n\r\n"; 

    //create socket and download data 
    $fp = @fsockopen($url_data['host'], 80, $errno, $errstr, 30); 
    fputs($fp, $headers); 
    $remote_file = stream_get_contents($fp); 
    fclose($fp); 

    //save data 
    $saved_file = fopen($filename,'w') or die('Cannot Open File '. $filename); 
    fwrite($saved_file, $remote_file); 
    fclose($saved_file); 
}// save_remote_file 

对于那些关心 - 或需要做的,今后类似的东西,这里是完整的代码(没有上述功能)。


<?php 
/** 
* @author: jeremysawesome - www.dnawebagency.com 
* @desciption: This file retrieves the database information from Carrier Software. 
* This should be run with a cron in order to download the files. The file works 
* by getting the contents of the Carrier Software login page for the database. An 
* array of post values is created based on the contents of the login page and the 
* defined username and password. A socket is opened and the post information is 
* passed to the login page. 
**/ 

//define username and pass 
$user_info = array(
    'user'=>'[USERNAME]', 
    'pass'=>'[PASSWORD]' 
); 

//url for fhwa db file 
$fhwa_url = 'http://www.carriersoftwaredata.com/Login.aspx?file=FHWA'; 

//get the contents of the fhwa page 
$fhwa_login = file_get_contents($fhwa_url); 

//load contents of fhwa page into a DOMDocument object 
$fhwa_dom = new DOMDocument; 
if (!$fhwa_dom->loadhtml($fhwa_login)) 
{ 
    die('Could not Load html for FHWA Login page.'); 
} 
else 
{ 
    //create a post array to send back to the server - keys relate to the name of the input - this allows us to retrieve the randomly generated values of hidden inputs 
    $fhwa_post_items = array(
     '__EVENTTARGET' => '', 
     '__EVENTARGUMENT' => '', 
     '__VIEWSTATE' => '', 
     'Email' => '', 
     'Password' => '', 
     'btnSubmit' => 'Submit', 
     '__EVENTVALIDATION' => '', 
    ); 

    //create an xpath object 
    $xpath = new DOMXpath($fhwa_dom); 

    //iterate through the form1 form and find all inputs 
    foreach($xpath->query('//form[@name="form1"]//input') as $input) 
    { 
     //get name and value of input 
     $input_name = $input->getAttribute('name'); 
     $input_value = $input->getAttribute('value'); 

     //check if input name matches a key in the post array 
     if(array_key_exists($input_name, $fhwa_post_items)) 
     { 
     //if the input name is Email or Password enter the defined email and password 
     switch($input_name) 
     { 
      case 'Email': 
       $input_value = $user_info['user']; 
       break; 
      case 'Password': 
       $input_value = $user_info['pass']; 
       break; 
     }//switch 

     //assign value to post array 
     $fhwa_post[$input_name] = $input_value; 
     }// if 
    }// foreach 

    //save the file - function shown above 
    save_remote_file($fhwa_url, $fhwa_post, "my_data_folder/my_fhwa-db.zip"); 
}// if 
1

看起来你需要使用卷曲库:http://www.php.net/manual/en/book.curl.php

下面是他们使用后的数据与卷曲的网站为例(在提交的表单数据是数组$数据):

<?php 

/* http://localhost/upload.php: 
print_r($_POST); 
print_r($_FILES); 
*/ 

$ch = curl_init(); 

$data = array('name' => 'Foo', 'file' => '@/home/user/test.png'); 

curl_setopt($ch, CURLOPT_URL, 'http://localhost/upload.php'); 
curl_setopt($ch, CURLOPT_POST, 1); 
curl_setopt($ch, CURLOPT_POSTFIELDS, $data); 

curl_exec($ch); 
?> 
+0

喜克里斯 - 感谢您的帮助。我发现了一个不同的解决方案,无需使用cURL,但是如果我需要做类似的事情,肯定会看到这个。感谢您向我展示如何通过cURL发送帖子字段。 – jeremysawesome 2010-11-09 17:57:29

1

我将不得不这样做的方法是使用卷曲。如果服务器正在加载,您可以将HTTP基本身份验证与请求一起传递。之后,您应该能够从curl请求中获取数据并将其保存到本地机器,并使用php fwrite函数。

这是我用过的一个例子。我剔除了我的幻想逻辑,只是基本的。它也没有用于基本身份验证的代码,但您应该能够轻松地在Google中找到该代码。我认为这可能会比较容易,除非有什么其他的事情你必须做。

// Download data 
$ch = curl_init('http://www.server.com/file.txt'); 
curl_setopt($ch, CURLOPT_HEADER, 0); 
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); 
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); 
$rawdata = curl_exec($ch); 
curl_close ($ch); 

// Save data 
$fp = fopen('new/file/location.txt','w'); 
fwrite($fp, $rawdata); 
fclose($fp); 
+0

我通读了你的代码部分,听起来好像不是为服务器请求做的。我不知道你是否可以使用curl存储会话,但是你可以尝试先请求登录,然后尝试向csv发出请求? – gokujou 2010-11-06 03:06:49

+0

非常感谢您的帮助@gokujou。我已经能够得到这个工作。在这个例子中,我没有使用cURL,但是如果我需要再次这样做,我肯定会考虑它。 +1,我在我的函数中使用了保存数据功能。 – jeremysawesome 2010-11-09 17:55:17