2017-07-20 65 views
1

我制作了这个脚本,用于从instagram获得“影响者”的跟随者数量这是做更好的并行编程方式吗?

我从中获得的“运行时”数字在550-750ms之间。 这并不是说不好,但我想知道是否可以更好与否(因为我是一个golang小白 - 学习它只有3周)

package main 

import (
    "encoding/json" 
    "fmt" 
    "io/ioutil" 
    "log" 
    "net/http" 
    "sync" 
    "time" 
) 

type user struct { 
    User userData `json:"user"` 
} 

type userData struct { 
    Followers count `json:"followed_by"` 
} 

type count struct { 
    Count int `json:"count"` 
} 

func getFollowerCount(in <-chan string) <-chan int { 
    out := make(chan int) 
    go func() { 
     for un := range in { 
      URL := "https://www.instagram.com/" + un + "/?__a=1" 
      resp, err := http.Get(URL) 
      if err != nil { 
       // handle error 
       fmt.Println(err) 
      } 
      defer resp.Body.Close() 
      body, err := ioutil.ReadAll(resp.Body) 
      var u user 
      err = json.Unmarshal(body, &u) 
      if err != nil { 
       fmt.Println(err) 
      } 
      // return u.User.Followers.Count 
      out <- u.User.Followers.Count 
     } 
     close(out) 
    }() 
    return out 
} 

func merge(cs ...<-chan int) <-chan int { 
    var wg sync.WaitGroup 
    out := make(chan int) 
    output := func(c <-chan int) { 
     for n := range c { 
      out <- n 
     } 
     wg.Done() 
    } 

    wg.Add(len(cs)) 
    for _, c := range cs { 
     go output(c) 
    } 
    go func() { 
     wg.Wait() 
     close(out) 
    }() 
    return out 
} 

func gen(users ...string) <-chan string { 
    out := make(chan string) 
    go func() { 
     for _, u := range users { 
      out <- u 
     } 
     close(out) 
    }() 
    return out 
} 

func main() { 
    start := time.Now() 
    fmt.Println("STARTING UP") 
    usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"} 
    in := gen(usrs...) 
    d1 := getFollowerCount(in) 
    d2 := getFollowerCount(in) 
    d3 := getFollowerCount(in) 
    d4 := getFollowerCount(in) 
    d5 := getFollowerCount(in) 
    d6 := getFollowerCount(in) 
    d7 := getFollowerCount(in) 
    d8 := getFollowerCount(in) 
    d9 := getFollowerCount(in) 
    d10 := getFollowerCount(in) 

    for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) { 
     fmt.Println(d) 
    } 

    elapsed := time.Since(start) 
    log.Println("runtime", elapsed) 
} 

回答

1

欢迎去,快乐学习。

你做得很好,你可以通过很多方式来改善你的程序(比如json解码器,更少没有陈等等)。以下是其中一种方法。执行时间在352-446毫秒之间(因为网络调用涉及您的代码,因此可能需要大量的盐,可能会因服务器响应时间而异)。

你更新后的代码:

package main 

import (
    "encoding/json" 
    "fmt" 
    "log" 
    "net/http" 
    "sync" 
    "time" 
) 

type user struct { 
    User userData `json:"user"` 
} 

type userData struct { 
    Followers count `json:"followed_by"` 
} 

type count struct { 
    Count int `json:"count"` 
} 

func getFollowerCount(username string, result chan<- int, wg *sync.WaitGroup) { 
    defer wg.Done() 
    reqURL := "https://www.instagram.com/" + username + "/?__a=1" 
    resp, err := http.Get(reqURL) 
    if err != nil { 
     log.Println(err) 
     return 
    } 
    defer resp.Body.Close() 

    var u user 
    if err := json.NewDecoder(resp.Body).Decode(&u); err != nil { 
     log.Println(err) 
     return 
    } 
    result <- u.User.Followers.Count 
} 

func execute(users []string, result chan<- int) { 
    wg := &sync.WaitGroup{} 
    for _, username := range users { 
     wg.Add(1) 
     go getFollowerCount(username, result, wg) 
    } 
    wg.Wait() 
    result <- -1 
} 

func main() { 
    start := time.Now() 
    fmt.Println("STARTING UP") 
    usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"} 

    result := make(chan int) 
    go execute(usrs, result) 

    for v := range result { 
     if v == -1 { 
      break 
     } 
     fmt.Println(v) 
    } 

    elapsed := time.Since(start) 
    fmt.Println("runtime:", elapsed) 
} 
2

我jeevatkm同意,也有实现你的任务,并改进了许多办法。一些说明:

  1. 将实际完成这项工作的功能(即从远程服务获取结果)和负责协调所有工作的功能分开。
  2. error传播给调用者是一种很好的做法,而不是在要调用的函数中使用(处理)它。
  3. 由于作业在并行中完成,因此可能会以未确定的顺序返回结果。因此,除追随者人数外,结果应包含其他相关信息。

下面的实现可能是一个可供选择的:

package main 

import (
    "encoding/json" 
    "errors" 
    "fmt" 
    "net/http" 
    "sync" 
    "time" 
) 

type user struct { 
    User userData `json:"user"` 
} 

type userData struct { 
    Followers count `json:"followed_by"` 
} 

type count struct { 
    Count int `json:"count"` 
} 

//Wrap username, count, and error. See (3) above. 
type follower struct { 
    Username string 
    Count int 
    Error error 
} 

//GetFollowerCountFunc is a function for 
//fetching follower count of a specific user. 
type GetFollowerCountFunc func(string) (int, error) 

//Mockup function for test 
func mockGetFollowerCountFor(userName string) (int, error) { 
    if len(userName) < 9 { 
     return -1, errors.New("mocking error in get follower count") 
    } 
    return 10, nil 
} 

//Fetch result from remote service. See (1) above. 
func getFollowerCountFor(userName string) (int, error) { 
    URL := "https://www.instagram.com/" + userName + "/?__a=1" 
    resp, err := http.Get(URL) 
    if err != nil { 
     return -1, err 
    } 
    defer resp.Body.Close() 

    var u user 
    if err := json.NewDecoder(resp.Body).Decode(&u); err != nil { 
     return -1, err 
    } 
    return u.User.Followers.Count, nil 
} 

//Function that coordinates/distributes the jobs. See (1), (2) above. 
func getFollowersAsync(users []string, fn GetFollowerCountFunc) <-chan follower { 
    //allocate channels for storing result 
    //number of allocated channels define the maximum *parallel* worker 
    followers := make(chan follower, len(users)) 
    //The following is also valid 
    //followers := make(chan follower, 5) 

    //Do the job distribution in goroutine (Asynchronously) 
    go func() { 
     var wg sync.WaitGroup 
     wg.Add(len(users)) 
     for _, u := range users { 
      //Run a *parallel* worker 
      go func(uid string) { 
       cnt, err := fn(uid) 
       if err != nil { 
        followers <- follower{uid, -1, err} 
       } else { 
        followers <- follower{uid, cnt, nil} 
       } 
       wg.Done() 
      }(u) 
     } 
     //wait all workers finish 
     wg.Wait() 

     //close the channels so the `for ... range` will exit gracefully 
     close(followers) 
    }() 

    //This function will returns immediately 
    return followers 
} 

func main() { 
    start := time.Now() 
    fmt.Println("STARTING UP") 
    usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"} 

    results := getFollowersAsync(usrs, getFollowerCountFor) 
    //For TESTING: 
    //results := getFollowersAsync(usrs, mockGetFollowerCountFor) 
    for r := range results { 
     if r.Error != nil { 
      fmt.Printf("Error for user '%s' => %v", r.Username, r.Error) 
     } else { 
      fmt.Printf("%s: %d\n", r.Username, r.Count) 
     } 
    } 

    elapsed := time.Since(start) 
    fmt.Println("runtime", elapsed) 
} 
相关问题