Blame - hswaw/site/feeds.go - hscloud

blob: ad9bc3173fc6f29c569d3a5318be4b8b3e583cb1 [file] [log] [blame]

Serge Bazanski	3c9092a	2021-05-30 23:15:20 +0000	[diff] [blame]	1	package main
				2
				3	import (
				4	"context"
				5	"encoding/xml"
				6	"fmt"
				7	"html/template"
				8	"net/http"
				9	"sort"
				10	"time"
				11
				12	"github.com/golang/glog"
				13	)
				14
				15	// This implements 'Atom' feed parsing. Honestly, this was written without
				16	// looking at any spec. If it ever breaks, you know why.
				17
				18	var (
				19	// feedURLs is a map from an atom feed name to its URL. All the following
				20	// feeds will be combined and rendered on the main page of the website.
				21	feedsURLs = map[string]string{
				22	"blog": "https://blog.hackerspace.pl/feed/atom/",
				23	}
				24	)
				25
				26	// atomFeed is a retrieved atom feed.
				27	type atomFeed struct {
				28	XMLName xml.Name `xml:"feed"`
				29	Entries []*atomEntry `xml:"entry"`
				30	}
				31
				32	// atomEntry is an entry (eg. blog post) from an atom feed. It contains fields
				33	// directly from the XML, plus some additional parsed types and metadata.
				34	type atomEntry struct {
				35	XMLName xml.Name `xml:"entry"`
				36	Author string `xml:"author>name"`
				37	Title template.HTML `xml:"title"`
				38	Summary template.HTML `xml:"summary"`
				39	UpdatedRaw string `xml:"updated"`
				40	PublishedRaw string `xml:"published"`
				41	Link struct {
				42	Href string `xml:"href,attr"`
				43	} `xml:"link"`
				44
				45	// Updated is the updated time parsed from UpdatedRaw.
				46	Updated time.Time
				47	// UpdatedHuman is a human-friendly representation of Updated for web rendering.
				48	UpdatedHuman string
				49	// Published is the published time parsed from PublishedRaw.
				50	Published time.Time
				51	// Source is the name of the feed that this entry was retrieved from. Only
				52	// set after combining multiple feeds together (ie. when returned from
				53	// getFeeds).
				54	Source string
				55	}
				56
				57	// getAtomFeed retrieves a single Atom feed from the given URL.
				58	func getAtomFeed(ctx context.Context, url string) (*atomFeed, error) {
				59	r, err := http.NewRequestWithContext(ctx, "GET", url, nil)
				60	if err != nil {
				61	return nil, fmt.Errorf("NewRequest(%q): %w", url, err)
				62	}
				63	res, err := http.DefaultClient.Do(r)
				64	if err != nil {
				65	return nil, fmt.Errorf("Do(%q): %w", url, err)
				66	}
				67	defer res.Body.Close()
				68
				69	var feed atomFeed
				70	d := xml.NewDecoder(res.Body)
				71	if err := d.Decode(&feed); err != nil {
				72	return nil, fmt.Errorf("Decode: %w", err)
				73	}
				74
				75	for i, e := range feed.Entries {
				76	updated, err := time.Parse(time.RFC3339, e.UpdatedRaw)
				77	if err != nil {
				78	return nil, fmt.Errorf("entry %d: cannot parse updated date %q: %v", i, e.UpdatedRaw, err)
				79	}
				80	published, err := time.Parse(time.RFC3339, e.PublishedRaw)
				81	if err != nil {
				82	return nil, fmt.Errorf("entry %d: cannot parse published date %q: %v", i, e.PublishedRaw, err)
				83	}
				84	e.Updated = updated
				85	e.Published = published
				86	e.UpdatedHuman = e.Updated.Format("02-01-2006")
				87	if e.Author == "" {
				88	e.Author = "Anonymous"
				89	}
				90	}
				91
				92	return &feed, nil
				93	}
				94
				95	// feedWorker runs a worker which retrieves all atom feeds every minute and
				96	// updates the services' feeds map with the retrieved data. On error, the feeds
				97	// are not updated (whatever is already cached in the map will continue to be
				98	// available) and the error is logged.
				99	func (s *service) feedWorker(ctx context.Context) {
				100	okay := false
				101	get := func() {
				102	feeds := make(map[string]*atomFeed)
				103
				104	prev := okay
				105	okay = true
				106	for name, url := range feedsURLs {
				107	feed, err := getAtomFeed(ctx, url)
				108	if err != nil {
				109	glog.Errorf("Getting feed %v failed: %v", feed, err)
				110	okay = false
				111	continue
				112	}
				113	feeds[name] = feed
				114	}
				115
				116	// Log whenever the first fetch succeeds, or whenever the fetch
				117	// succeeds again (avoiding polluting logs with success messages).
				118	if !prev && okay {
				119	glog.Infof("Feeds okay.")
				120	}
				121
				122	// Update cached feeds.
				123	s.feedsMu.Lock()
				124	s.feeds = feeds
				125	s.feedsMu.Unlock()
				126	}
				127	// Perform initial fetch.
				128	get()
				129
				130	// ... and update every minute.
				131	t := time.NewTicker(time.Minute)
				132	defer t.Stop()
				133
				134	for {
				135	select {
				136	case <-ctx.Done():
				137	return
				138	case <-t.C:
				139	get()
				140	}
				141	}
				142	}
				143
				144	// getFeeds retrieves the currently cached feeds and combines them into a
				145	// single reverse-chronological timeline, annotating each entries' Source field
				146	// with the name of the feed from where it was retrieved.
				147	func (s service) getFeeds() []atomEntry {
				148	s.feedsMu.RLock()
				149	feeds := s.feeds
				150	s.feedsMu.RUnlock()
				151
				152	var res []*atomEntry
				153	for n, feed := range feeds {
				154	for _, entry := range feed.Entries {
				155	e := *entry
				156	e.Source = n
				157	res = append(res, &e)
				158	}
				159	}
				160	sort.Slice(res, func(i, j int) bool {
				161	return res[j].Published.Before(res[i].Published)
				162	})
				163	return res
				164	}