hswaw/site/feeds.go - hscloud - Gitiles

 package main

 import (
 	"context"
 	"encoding/xml"
 	"fmt"
 	"html/template"
 	"net/http"
 	"sort"
 	"time"

 	"github.com/golang/glog"
 )

 // This implements 'Atom' feed parsing. Honestly, this was written without
 // looking at any spec. If it ever breaks, you know why.

 var (
 	// feedURLs is a map from an atom feed name to its URL. All the following
 	// feeds will be combined and rendered on the main page of the website.
 	feedsURLs = map[string]string{
 		"blog": "https://blog.hackerspace.pl/feed/atom/",
 	}
 )

 // atomFeed is a retrieved atom feed.
 type atomFeed struct {
 	XMLName xml.Name     `xml:"feed"`
 	Entries []*atomEntry `xml:"entry"`
 }

 // atomEntry is an entry (eg. blog post) from an atom feed. It contains fields
 // directly from the XML, plus some additional parsed types and metadata.
 type atomEntry struct {
 	XMLName      xml.Name      `xml:"entry"`
 	Author       string        `xml:"author>name"`
 	Title        template.HTML `xml:"title"`
 	Summary      template.HTML `xml:"summary"`
 	UpdatedRaw   string        `xml:"updated"`
 	PublishedRaw string        `xml:"published"`
 	Link         struct {
 		Href string `xml:"href,attr"`
 	} `xml:"link"`

 	// Updated is the updated time parsed from UpdatedRaw.
 	Updated time.Time
 	// UpdatedHuman is a human-friendly representation of Updated for web rendering.
 	UpdatedHuman string
 	// Published is the published time parsed from PublishedRaw.
 	Published time.Time
 	// Source is the name of the feed that this entry was retrieved from. Only
 	// set after combining multiple feeds together (ie. when returned from
 	// getFeeds).
 	Source string
 }

 // getAtomFeed retrieves a single Atom feed from the given URL.
 func getAtomFeed(ctx context.Context, url string) (*atomFeed, error) {
 	r, err := http.NewRequestWithContext(ctx, "GET", url, nil)
 	if err != nil {
 		return nil, fmt.Errorf("NewRequest(%q): %w", url, err)
 	}
 	res, err := http.DefaultClient.Do(r)
 	if err != nil {
 		return nil, fmt.Errorf("Do(%q): %w", url, err)
 	}
 	defer res.Body.Close()

 	var feed atomFeed
 	d := xml.NewDecoder(res.Body)
 	if err := d.Decode(&feed); err != nil {
 		return nil, fmt.Errorf("Decode: %w", err)
 	}

 	for i, e := range feed.Entries {
 		updated, err := time.Parse(time.RFC3339, e.UpdatedRaw)
 		if err != nil {
 			return nil, fmt.Errorf("entry %d: cannot parse updated date %q: %v", i, e.UpdatedRaw, err)
 		}
 		published, err := time.Parse(time.RFC3339, e.PublishedRaw)
 		if err != nil {
 			return nil, fmt.Errorf("entry %d: cannot parse published date %q: %v", i, e.PublishedRaw, err)
 		}
 		e.Updated = updated
 		e.Published = published
 		e.UpdatedHuman = e.Updated.Format("02-01-2006")
 		if e.Author == "" {
 			e.Author = "Anonymous"
 		}
 	}

 	return &feed, nil
 }

 // feedWorker runs a worker which retrieves all atom feeds every minute and
 // updates the services' feeds map with the retrieved data. On error, the feeds
 // are not updated (whatever is already cached in the map will continue to be
 // available) and the error is logged.
 func (s *service) feedWorker(ctx context.Context) {
 	okay := false
 	get := func() {
 		feeds := make(map[string]*atomFeed)

 		prev := okay
 		okay = true
 		for name, url := range feedsURLs {
 			feed, err := getAtomFeed(ctx, url)
 			if err != nil {
 				glog.Errorf("Getting feed %v failed: %v", feed, err)
 				okay = false
 				continue
 			}
 			feeds[name] = feed
 		}

 		// Log whenever the first fetch succeeds, or whenever the fetch
 		// succeeds again (avoiding polluting logs with success messages).
 		if !prev && okay {
 			glog.Infof("Feeds okay.")
 		}

 		// Update cached feeds.
 		s.feedsMu.Lock()
 		s.feeds = feeds
 		s.feedsMu.Unlock()
 	}
 	// Perform initial fetch.
 	get()

 	// ... and update every minute.
 	t := time.NewTicker(time.Minute)
 	defer t.Stop()

 	for {
 		select {
 		case <-ctx.Done():
 			return
 		case <-t.C:
 			get()
 		}
 	}
 }

 // getFeeds retrieves the currently cached feeds and combines them into a
 // single reverse-chronological timeline, annotating each entries' Source field
 // with the name of the feed from where it was retrieved.
 func (s *service) getFeeds() []*atomEntry {
 	s.feedsMu.RLock()
 	feeds := s.feeds
 	s.feedsMu.RUnlock()

 	var res []*atomEntry
 	for n, feed := range feeds {
 		for _, entry := range feed.Entries {
 			e := *entry
 			e.Source = n
 			res = append(res, &e)
 		}
 	}
 	sort.Slice(res, func(i, j int) bool {
 		return res[j].Published.Before(res[i].Published)
 	})
 	return res
 }
	package main

	import (
	"context"
	"encoding/xml"
	"fmt"
	"html/template"
	"net/http"
	"sort"
	"time"

	"github.com/golang/glog"
	)

	// This implements 'Atom' feed parsing. Honestly, this was written without
	// looking at any spec. If it ever breaks, you know why.

	var (
	// feedURLs is a map from an atom feed name to its URL. All the following
	// feeds will be combined and rendered on the main page of the website.
	feedsURLs = map[string]string{
	"blog": "https://blog.hackerspace.pl/feed/atom/",
	}
	)

	// atomFeed is a retrieved atom feed.
	type atomFeed struct {
	XMLName xml.Name `xml:"feed"`
	Entries []*atomEntry `xml:"entry"`
	}

	// atomEntry is an entry (eg. blog post) from an atom feed. It contains fields
	// directly from the XML, plus some additional parsed types and metadata.
	type atomEntry struct {
	XMLName xml.Name `xml:"entry"`
	Author string `xml:"author>name"`
	Title template.HTML `xml:"title"`
	Summary template.HTML `xml:"summary"`
	UpdatedRaw string `xml:"updated"`
	PublishedRaw string `xml:"published"`
	Link struct {
	Href string `xml:"href,attr"`
	} `xml:"link"`

	// Updated is the updated time parsed from UpdatedRaw.
	Updated time.Time
	// UpdatedHuman is a human-friendly representation of Updated for web rendering.
	UpdatedHuman string
	// Published is the published time parsed from PublishedRaw.
	Published time.Time
	// Source is the name of the feed that this entry was retrieved from. Only
	// set after combining multiple feeds together (ie. when returned from
	// getFeeds).
	Source string
	}

	// getAtomFeed retrieves a single Atom feed from the given URL.
	func getAtomFeed(ctx context.Context, url string) (*atomFeed, error) {
	r, err := http.NewRequestWithContext(ctx, "GET", url, nil)
	if err != nil {
	return nil, fmt.Errorf("NewRequest(%q): %w", url, err)
	}
	res, err := http.DefaultClient.Do(r)
	if err != nil {
	return nil, fmt.Errorf("Do(%q): %w", url, err)
	}
	defer res.Body.Close()

	var feed atomFeed
	d := xml.NewDecoder(res.Body)
	if err := d.Decode(&feed); err != nil {
	return nil, fmt.Errorf("Decode: %w", err)
	}

	for i, e := range feed.Entries {
	updated, err := time.Parse(time.RFC3339, e.UpdatedRaw)
	if err != nil {
	return nil, fmt.Errorf("entry %d: cannot parse updated date %q: %v", i, e.UpdatedRaw, err)
	}
	published, err := time.Parse(time.RFC3339, e.PublishedRaw)
	if err != nil {
	return nil, fmt.Errorf("entry %d: cannot parse published date %q: %v", i, e.PublishedRaw, err)
	}
	e.Updated = updated
	e.Published = published
	e.UpdatedHuman = e.Updated.Format("02-01-2006")
	if e.Author == "" {
	e.Author = "Anonymous"
	}
	}

	return &feed, nil
	}

	// feedWorker runs a worker which retrieves all atom feeds every minute and
	// updates the services' feeds map with the retrieved data. On error, the feeds
	// are not updated (whatever is already cached in the map will continue to be
	// available) and the error is logged.
	func (s *service) feedWorker(ctx context.Context) {
	okay := false
	get := func() {
	feeds := make(map[string]*atomFeed)

	prev := okay
	okay = true
	for name, url := range feedsURLs {
	feed, err := getAtomFeed(ctx, url)
	if err != nil {
	glog.Errorf("Getting feed %v failed: %v", feed, err)
	okay = false
	continue
	}
	feeds[name] = feed
	}

	// Log whenever the first fetch succeeds, or whenever the fetch
	// succeeds again (avoiding polluting logs with success messages).
	if !prev && okay {
	glog.Infof("Feeds okay.")
	}

	// Update cached feeds.
	s.feedsMu.Lock()
	s.feeds = feeds
	s.feedsMu.Unlock()
	}
	// Perform initial fetch.
	get()

	// ... and update every minute.
	t := time.NewTicker(time.Minute)
	defer t.Stop()

	for {
	select {
	case <-ctx.Done():
	return
	case <-t.C:
	get()
	}
	}
	}

	// getFeeds retrieves the currently cached feeds and combines them into a
	// single reverse-chronological timeline, annotating each entries' Source field
	// with the name of the feed from where it was retrieved.
	func (s service) getFeeds() []atomEntry {
	s.feedsMu.RLock()
	feeds := s.feeds
	s.feedsMu.RUnlock()

	var res []*atomEntry
	for n, feed := range feeds {
	for _, entry := range feed.Entries {
	e := *entry
	e.Source = n
	res = append(res, &e)
	}
	}
	sort.Slice(res, func(i, j int) bool {
	return res[j].Published.Before(res[i].Published)
	})
	return res
	}