Hatena::Grouppython

zorioの日記

2010-04-01

webページをevernoteにクリップするスクリプト

22:53

順に、

という操作を行う。

htmlの解析はBeautifulSoupを使用。

http://www.crummy.com/software/BeautifulSoup/

# -*- encoding: utf-8 -*-

import urllib, urllib2, cookielib, re, logging
from BeautifulSoup import BeautifulSoup

LOGIN_URL = "https://www.evernote.com/Login.action"
POST_URL = "http://www.evernote.com/clip.action"
USERNAME = 'xxxxxxxxxx'
PASSWORD = 'yyyyyyyyyyyyyy'
NOTEBOOK_GUID = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'

def clip(url, comment, title):
    logging.debug(url)
    logging.debug(title)
    logging.debug(comment)

    tags = ''

    soup = BeautifulSoup(urllib2.urlopen(url).read())
    body = ''
    if soup.body:
        body = unicode(soup.body).encode('utf-8')
    
    if isinstance(title, unicode):
        title = title.encode('utf-8')
    
    if isinstance(comment, unicode):
        comment = comment.encode('utf-8')
    
    cj = cookielib.CookieJar()
    cjhdr = urllib2.HTTPCookieProcessor(cj)
    opener = urllib2.build_opener(cjhdr)

    soup = BeautifulSoup(opener.open(LOGIN_URL).read())
    srcpage = soup.find('input', {'name': '_sourcePage'})
    fp = soup.find('input', {'name': '__fp'})

    opener.open(LOGIN_URL, urllib.urlencode({
        '_sourcePage': srcpage['value'], 
        '__fp': fp['value'], 
        'username': USERNAME, 
        'password': PASSWORD, 
        'login': 'Sign in'
    }))

    param = urllib.urlencode({
      'url': url,
      'body': body, 
      'title': title, 
      'format': 'microclip', 
      'quicknote': 'true'
    })
    logging.debug(param)
    r = opener.open(POST_URL, param)

    soup = BeautifulSoup(r.read())
    
    sel = soup.findAll('select', {'name': 'notebookGuid', 'value': NOTEBOOK_GUID})
    if sel == None:
        raise ValueError('Not logged in')
    
    param = urllib.urlencode({
        'title': title, 
        'tags': tags,
        'notebookGuid': NOTEBOOK_GUID, 
        'comment': comment, 
        'url': url, 
        'fullPage': 'true', 
        'saveQuicknote': 'save',
        'format': 'microclip',
        'body': body,
        '_sourcePage': soup.find('input', {'name': '_sourcePage'})['value'], 
        '__fp': soup.find('input', {'name': '__fp'})['value']
    })
    logging.debug(param)
    r = opener.open(POST_URL, param)
    r.read()

最後に404が出てコケるんだが、登録は出来ている。

で、これをgoogle appengineで動かそうとすると、最後のPOSTでタイムアウトするみたいでうまく動かない。

詳細を追いかけるのは断念。

BayleBayle2011/07/27 05:51Superbly illuminating data here, tahnks!

jnzegblyrdujnzegblyrdu2011/07/27 21:41weNSb5 <a href="http://krbkzqmkljfc.com/">krbkzqmkljfc</a>

qfqpooagqfqpooag2011/07/28 22:53C3viZf , [url=http://zhafeamtgtok.com/]zhafeamtgtok[/url], [link=http://gmfcupcpvadr.com/]gmfcupcpvadr[/link], http://vkqpcbiaklwc.com/

hmoxqkumvhmoxqkumv2011/07/29 21:17vYvyuZ <a href="http://lzadcelykvcn.com/">lzadcelykvcn</a>

bugphznqbugphznq2011/07/31 00:46k26xuw , [url=http://oygclwgmvvgn.com/]oygclwgmvvgn[/url], [link=http://fqddlcwejkss.com/]fqddlcwejkss[/link], http://tlqpxytycmyd.com/

nn2012/01/30 04:50GAEの場合HTTPSによるGETは許可されてもPOSTは許可されてないらしいです。

CynthiaCynthia2012/12/09 20:26I don't know who you wrote this for but you helped a brtoher out.

ybsoveyhybsoveyh2012/12/10 18:21ciMySw <a href="http://wewmwxmicxme.com/">wewmwxmicxme</a>

vcklzyvcklzy2012/12/12 03:31FXVGv9 , [url=http://ikcgtjtdajbr.com/]ikcgtjtdajbr[/url], [link=http://qjppgbfwiksg.com/]qjppgbfwiksg[/link], http://cdxfhdfmugku.com/

frtfmgijjsfrtfmgijjs2014/02/07 19:26rxlscqzuipo, <a href="http://www.kbgfjbmoix.com/">ifgwsikrgy</a>

ljyynztkhsljyynztkhs2014/02/10 06:56cygvpqzuipo, http://www.ouzgqdnjwc.com/ dlnxtovlgh

dxszpphmindxszpphmin2014/02/12 19:57sowtlqzuipo, <a href="http://www.xnblubwvke.com/">njezxztfvd</a> , [url=http://www.udvxiienhr.com/]blaxbobmnb[/url], http://www.vqjngciqal.com/ njezxztfvd