BTW ...本网站仅与Internet Explorer工程....登录到使用Python
我试图取消一个网站的客户端,这样我可以为他们自动执行任务。基本上,它会抓住不同的报告,寻找转身时间,并通过电子邮件发送给客户。我的报废程序工作正常,我遇到的问题是使用Mechanize登录到网站,因为登录表单正在使用AJAX。我有四处寻找解决方案,但似乎无法找到我正在寻找什么。
下面是HTML表单和(从我能告诉的)处理它的AJAX。
function TranLogin() { var url = 'login.aspx?isAjax=true&eventTarget=TranLogin'; var postData = Form.serialize('Form1'); ajaxRequest = new Ajax.Request( url, { method : 'post', postBody : postData, onComplete : TransLoginFinished, onFailure : reportError, onException : reportException }); }
function TransLoginFinished(serverResponse) { if (requestFailed) return; xmlNodes = serverResponse.responseXML; usrSite = "8000"; usrCode = decodeXmlChar(xmlNodes.getElementsByTagName('UserCode')[0].text); if (xmlNodes.getElementsByTagName('LoginResult')[0].text == '-1'){ alert(decodeXmlChar(xmlNodes.getElementsByTagName('FailMsg')[0].text)); Form.enable('Form1'); return; } if (xmlNodes.getElementsByTagName('LoginResult')[0].text == '20'){ window.location.replace('initpasswd.aspx?usersite=' + usrSite + '&usercode=' + usrCode); return; } if (xmlNodes.getElementsByTagName('LoginResult')[0].text == '14'){ window.location.replace('chgpasswd.aspx?type=chgpwd&usersite=' + usrSite + '&usercode=' + usrCode); return; } if (xmlNodes.getElementsByTagName('LoginResult')[0].text == '16'){ window.location.replace('chgpasswd.aspx?type=pwdexpire&usersite=' + usrSite + '&usercode=' + usrCode); return; } if (xmlNodes.getElementsByTagName('LoginResult')[0].text == '0'){ if (xmlNodes.getElementsByTagName('PwdExpireWarning')[0].text == 'true'){ var changePwdNow = window.confirm(decodeXmlChar(xmlNodes.getElementsByTagName('PwdExpireMsg')[0].text)); if (changePwdNow == true){ window.location.replace('chgpasswd.aspx?type=chgpwd&usersite=' + usrSite + '&usercode=' + usrCode); return; } // var arg = { promptMsg :decodeXmlChar(xmlNodes.getElementsByTagName('PwdExpireMsg')[0].text), // buttons : [ { value : "Yes", rtnVal : 1 }, // { value : "No", rtnVal : 0 } // ] // }; // var rtn = window.showModalDialog('../Modules/ModalMessageBox.aspx',arg, "dialogHeight:140px;dialogWidth:500px; center:1;status:no;"); // if (rtn && rtn == 1){ // window.location.replace('chgpasswd.aspx?type=chgpwd&usersite=' + usrSite + '&usercode=' + usrCode); // return; // }
} if (JTrim($('txtHospCode').value) == '') { hospList = decodeXmlChar(xmlNodes.getElementsByTagName('HospList')[0].text).split('|'); if (hospList.length < 2) { selectedHospCode = hospList[0].split('-')[0]; TranSelectHosp(selectedHospCode); return; } $('divHospList').style.display = 'block';
for(i=0;i<hospList.length;i++)
{
if (hospList[i] != '')
{
divHospCode = document.createElement("div");
divHospCode.className='divHospCode';
$('divHospListBG').appendChild(divHospCode);
lnkHospCode = document.createElement("a");
if (hospList[i].length <= 33)
lnkHospCode.innerText = hospList[i];
else
lnkHospCode.innerText = hospList[i].substr(0,30) + '...';
lnkHospCode.title = hospList[i];
lnkHospCode.className = 'lnkHospCode';
divHospCode.appendChild(lnkHospCode);
lnkHospCode.onmouseover = function(){this.style.color = '#000000';}
lnkHospCode.onmouseout = function(){this.style.color = '#6c6c6c';}
lnkHospCode.onclick = function(){TranSelectHosp(this.innerText.split('-')[0]);}
if (i > 7 && $('divHospListBG').style.overflow != 'auto')
{
$('divHospListBG').style.height = '198px';
$('divHospListBG').style.overflow = 'auto';
}
}
}
return;
}
else
{
TranSelectHosp(JTrim($('txtHospCode').value));
}
}
}
<form name="Form1" method="post" action="login.aspx" id="Form1">
输入帐户代码:
<div class="divRight">
<input name="txtHospCode" type="text" id="txtHospCode" class="inputClass" maxlength="4" />
</div>
<div class="divLeft">
<span>Input User Code:</span>
</div>
<div class="divRight">
<input name="txtUserCode" type="text" id="txtUserCode" class="inputClass" maxlength="6" />
</div>
<div class="divLeft">
<span>Input Password:</span></div>
<div class="divRight">
<input name="txtPassword" type="password" id="txtPassword" class="inputClass" />
</div>
<div class="divLeft">
<span>Login As:</span>
</div>
<div class="divRight">
<input type="radio" name="rdLoginType" value="D" checked="checked" />Doctor
<input type="radio" name="rdLoginType" value="T" />Other
</div>
<div class="divLeft">
</div>
<div class="divRight">
<input class="buttonClass" id="btnOK" type="button" value="Enter" onclick="LoginIn();" />
<input class="buttonClass" id="btnReset" type="button" value="Reset" onclick="ResetInput();" />
</div>
到目前为止我的代码
import mechanize
import cookielib
from BeautifulSoup import BeautifulSoup
import html2text
import re
Instatiate Browser
br = mechanize.Browser()
Cookie Jar
cj = cookielib.LWPCookieJar() br.set_cookiejar(cj)
Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False)
Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
User-Agent
br.addheaders = [('User-agent', 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0)')]
def login_to_website(login_url, login_form_name, usr_form_name, pwd_form_name,acct_code_name, usr, pwd, acct_code): """ Logs user into website """
# Open the url of the login page
br.open(login_url)
# Select the login form name
br.select_form(login_form_name)
# Enter user's credentials into the form
br.form[acct_code_name] = acct_code
br.form[usr_form_name] = usr
br.form[pwd_form_name] = pwd
br.find_control(name='rdLoginType').value = ['T']
# Submit the form
print "Logging in as:", usr
br.submit()
# print current url
print "We are now at:", br.geturl()
# print error
if br.geturl() == login_url:
print "Login Failed"
else: print "Successfully logged in"
login_to_website( 'https://www.website.com', 'Form1中', 'txtUserCode', 'txtPassword', 'txtHospCode',USR,PWD,acctCode)
您可以使用控制真实浏览器的库来取消网站吗?这是我试图做的主要事情,但我只是遇到了麻烦登录。 – ChrisC 2011-01-06 00:39:46
我试图使用JavaScript禁用的网站无济于事。 – ChrisC 2011-01-06 00:53:26
@ChrisC:我认为应该可以通过控制浏览器来刮取。以下是Selenium的文档:http://code.google.com/p/selenium/wiki/PythonBindings如果不足以获取所需的数据,请寻找获取HTML的方法,然后将其提供给BeautifulSoup。 – 2011-01-06 11:08:02