概念图是公开关系图,访问方法:
import urllib
import json
import ssl
def http(x):
ssl._create_default_https_context = ssl._create_unverified_context
response = urllib.request.urlopen(x)
data = response.read()
return data.decode('utf-8')
def query(x):
return json.loads(http("https://concept.research.microsoft.com/api/Concept/ScoreByProb?instance={}&topK=10".format(urllib.parse.quote(x))))
query('microsoft')
如果要更新title的话,用NewsApi.org的数据,调api:
newsapi_key = '<your API key here>'def get_news(country='us'):
res = json.loads(http("https://newsapi.org/v2/top-headlines?country={0}&apiKey={1}".format(country,newsapi_key)))
return res['articles']
all_titles = [x['title'] for x in get_news('us')+get_news('gb')]
用TextBlob库萃取title的名词:
import sys!{sys.executable} -m pip install textblob
!{sys.executable} -m textblob.download_corpora
from textblob import TextBlob
w = {}for x in all_titles:
for n in TextBlob(x).noun_phrases:
if n in w:
w[n].append(x)
else:
w[n]=[x]{ x:len(w[x]) for x in w.keys()}
如果是要将title替换成一般词句(避免深奥专业词),要对所有词句用一次REST:
w = {}
for x in all_titles:
for noun in TextBlob(x).noun_phrases:
terms = query(noun.replace(' ','%20'))
for term in [u for u in terms.keys() if terms[u]>0.1]:
if term in w:
w[term].append(x)
else:
w[term]=[x]
调用:
x:len(w[x])
for x in w.keys()
if len(w[x])>3
输出:
{'city': 9,
'brand': 4,
'place': 9,
'town': 4,
'factor': 4,
'film': 4,
'nation': 11,
'state': 5,
'person': 4,
'organization': 5,
'publication': 10,
'market': 5,
'economy': 4,
'company': 6,
'newspaper': 6,
'relationship': 6}
打印title的具体描述:
print('\nECONOMY:\n'+'\n'.join(w['economy']))
print('\nNATION:\n'+'\n'.join(w['nation']))print('\nPERSON:\n'+'\n'.join(w['person']))