2016-11-30 66 views




import json 
from collections import defaultdict 
from pprint import pprint 

with open('data-science.txt') as data_file: 
    data = json.load(data_file) 

locations = defaultdict(int) 

for item in data['data']: 
    location = item['relationships']['location']['data']['id'] 
    locations[location] += 1 



  1: 6, 
     2: 20, 
     3: 2673, 
     4: 126, 
     5: 459, 
     6: 346, 
     8: 11, 
     9: 68, 
     10: 82, 

    "links": { 
     "self": "http://localhost:2510/api/v2/jobs?skills=data%20science" 
    "data": [ 
      "id": 121, 
      "type": "job", 
      "attributes": { 
       "title": "Data Scientist", 
       "date": "2014-01-22T15:25:00.000Z", 
       "description": "Data scientists are in increasingly high demand amongst tech companies in London. Generally a combination of business acumen and technical skills are sought. Big data experience ..." 
      "relationships": { 
       "location": { 
        "links": { 
         "self": "http://localhost:2510/api/v2/jobs/121/location" 
        "data": { 
         "type": "location", 
         "id": 3 
       "country": { 
        "links": { 
         "self": "http://localhost:2510/api/v2/jobs/121/country" 
        "data": { 
         "type": "country", 
         "id": 1 

它是由第一个python脚本,在这里工作的这些位置是"id" s以及分配给该位置的记录数。


"included": [ 
     "id": 3, 
     "type": "location", 
     "attributes": { 
      "name": "Victoria", 
      "coord": [ 


import json 
from collections import defaultdict 
from pprint import pprint 

with open('data-science.txt') as data_file: 
    data = json.load(data_file) 

locations = defaultdict(int) 

for record in data['included']: 
    id = record.get('id', None) 
    name = record.get('attributes', {}).get('name', None) 
    coord = record.get('attributes', {}).get('coord', None) 
    print(id, name, coord) 


3 Victoria [51.503378, -0.139134] 
1 United Kingdom None 
71 data science None 
32 None None 
3 Victoria [51.503378, -0.139134] 
1 United Kingdom None 
1 data mining None 
22 data analysis None 
33 sdlc None 
38 artificial intelligence None 
39 machine learning None 
40 software development None 
71 data science None 
93 devops None 
63 None None 
52 Cubitt Town [51.505199, -0.018848] 


3, Victoria, [51.503378, -0.139134], 2673 


如果它没有任何坐标,例如[51.503378, -0.139134]我可以把它扔掉。


所有真实的项目文件live here




import json 
from collections import defaultdict 
from pprint import pprint 

def process_locations_data(data): 
    # processes the 'data' block 
    locations = defaultdict(int) 
    for item in data['data']: 
     location = item['relationships']['location']['data']['id'] 
     locations[location] += 1 
    return locations 

def process_locations_included(data): 
    # processes the 'included' block 
    return_list = [] 
    for record in data['included']: 
     id = record.get('id', None) 
     name = record.get('attributes', {}).get('name', None) 
     coord = record.get('attributes', {}).get('coord', None) 
     return_list.append((id, name, coord)) 
    return return_list # return list of tuples 

# load the data from file once 
with open('data-science.txt') as data_file: 
    data = json.load(data_file) 

# use the two functions on same data 
locations = process_locations_data(data) 
records = process_locations_included(data) 

# combine the data for printing 
for record in records: 
    id, name, coord = record 
    references = locations[id] # lookup the references in the dict 
    print id, name, coord, references 



这个脚本可以工作,但是当你试图将它输出到一个输出文件时,你会得到错误UnicodeEncodeError:'ascii'编解码器不能在位置8编码字符u'\ xfc':序号不在范围(128) – CMorales


这与输入数据有关。它可以处理,阅读例如在这里:http://stackoverflow.com/questions/5760936/handle-wrongly-encoded-character-in-python-unicode-string 但你也可以保存到文件,而不是使用最后一个循环中的'print'。 – sal


如果这回答了原始问题,请将其标记为可接受的解决方案。 – sal