blob: 3ded775ccf2ed5f185c65ffc6dfb0da8ba0e856f [file] [log] [blame]
Serge Bazanski2f712bf2021-07-10 16:53:41 +00001# Script to attempt to clean up our owncloud database (b/32) after The Postgres
2# Fuckup (b/30).
3#
4# Think of it as a one-shot fsck, documented in the form of the code that q3k@
5# used to recover from this kerfuffle.
6#
7# SECURITY: It's full of manual SQL query crafting without parametrization.
8# Don't attempt to use it for anything else other than this one-shot usecase.
9#
10# You will need to tunnel to the postgreses running on Boston:
11# $ ssh \
12# -L15432:127.0.0.1:5432 \
13# -L15433:127.0.0.1:5433 \
14# hackerspace.pl
15
16from datetime import datetime
17import os
18
19import psycopg2
20
21
22incident_start = 1611529200 # when pg12 started to run
23incident_end = 1611788400 # when we rolled back to pg9
24
25
26OWNCLOUD_PASSWORD = os.environ.get("OWNCLOUD_PASSWORD").strip()
27if not OWNCLOUD_PASSWORD:
28 # Get it from boston, /var/www/owncloud/config/config.php.
29 raise Exception("OWNCLOUD_PASSWORD must be set to owncloud postgres password")
30
31
32conn9 = psycopg2.connect(host="localhost", port=15432, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud")
33conn12 = psycopg2.connect(host="localhost", port=15433, user="owncloud", password=OWNCLOUD_PASSWORD, dbname="owncloud")
34
35
36def idset(conn, table, keyname="id"):
37 """Return a set of IDs from a given table, one per row."""
38 cur = conn.cursor()
39 cur.execute(f"SELECT {keyname} FROM oc_{table}")
40 res = cur.fetchall()
41 cur.close()
42 return set([r[0] for r in res])
43
44
45def valset(conn, table, keys):
46 """Return a set of concatenated values for the given keys in a table, one per row."""
47 keynames = ", ".join(keys)
48 cur = conn.cursor()
49 cur.execute(f"SELECT {keynames} FROM oc_{table}")
50 res = cur.fetchall()
51 cur.close()
52 res = [';;;'.join([str(elem) for elem in r]) for r in res]
53 return set(res)
54
55
56# Check accounts difference.
57#
58# RESULT: Thankfully, no accounts have been accidentally roled back.
59accounts12 = idset(conn12, "accounts", keyname="uid")
60accounts9 = idset(conn9, "accounts", keyname="uid")
61print("Accounts missing in 9:", accounts12 - accounts9)
62assert (accounts12 - accounts9) == set()
63
64
65def account_by_uid(conn, uid):
66 """Return SSO UID for a given Owncloud UID."""
67 cur = conn.cursor()
68 cur.execute(f"SELECT ldap_dn FROM oc_ldap_user_mapping WHERE owncloud_name = '{uid}'")
69 dn, = cur.fetchone()
70 cur.close()
71 part = dn.split(',')[0]
72 assert part.startswith('uid=')
73 return part[4:]
74
75
76def storage_owner_by_id(conn, id_):
77 """Return SSO UID for a given storage numerical ID."""
78 cur = conn.cursor()
79 cur.execute(f"SELECT id FROM oc_storages WHERE numeric_id = '{id_}'")
80 oid, = cur.fetchone()
81 cur.close()
82 if oid == 'object::store:amazon::nextcloud':
83 return "S3"
84 assert oid.startswith('object::user:')
85 userid = oid[13:]
86 assert len(userid) > 0
87 if userid == "gallery":
88 return "GALLERY"
89 return account_by_uid(conn, userid)
90
91
92# Check shares table. This table contains the intent of sharing some file with someone else.
93#
94# RESULT: we only have things that have been removed after rollback to PG9,
95# nothing was created in PG12 and lost.
96shareids12 = idset(conn12, "share")
97shareids9 = idset(conn9, "share")
98print("Shares missing in 9:", len(shareids12 - shareids9))
99cur12 = conn12.cursor()
100for id_ in list(shareids12-shareids9):
101 cur12.execute(f"SELECT uid_owner, file_target, stime, share_with FROM oc_share WHERE id = {id_}")
102 uid_owner, file_target, stime, share_with = cur12.fetchone()
103 account = account_by_uid(conn12, uid_owner)
104 stime_human = datetime.utcfromtimestamp(stime).strftime('%Y-%m-%d %H:%M:%S')
105 print(f"Missing share {id_} {file_target} owned by {account}..")
106 if stime < incident_start or stime > incident_end:
107 print(f" Skipping, created at {stime_human}")
108 continue
109 raise Exception("Unhandled.")
110cur12.close()
111
112
113# Check mounts table. This contains root file storages for each user, but also
114# incoming shares 'mounted' into a user's account.
115# From what I cen tell, storage_id/root_id are the source path that's being
116# mounted (root_id being the fileid inside an oc_filecache, and storage_id
117# being the storage in which that file is kept), while user_id/mount_point are
118# the mount destination (ie. path into which this is mounted for a user's
119# view).
120#
121# RESULT: we only have share-mounts missing for a handful of users. We choose
122# to ignore it, as we assume next time these users log in they will get the
123# mounts again.
124# TODO(q3k): verify this
125mounts12 = valset(conn12, "mounts", ["storage_id", "root_id", "user_id", "mount_point"])
126mounts9 = valset(conn9, "mounts", ["storage_id", "root_id", "user_id", "mount_point"])
127print("Mounts missing in 9:", len(mounts12 - mounts9))
128# Mounts that appearify normally whenever you log into owncloud, as they are the result of shares':
129mount_names_ok = set(["2020-03-26_covid_templar", "camera", "Public Shaming", "przylbice.md", "Test.txt", "covid"])
130# Mounts that used to be from a share that existed, but has been since deleted in PG9.
131mount_names_ok |= set(["Covid-instrukcje", "Chaos_modele_covid", "Covid_proces_presspack"])
132mounts_sorted = []
133for m in list(mounts12 - mounts9):
134 storage_id, root_id, user_id, mount_point = m.split(';;;')
135 mounts_sorted.append((storage_id, root_id, user_id, mount_point))
136mounts_sorted = sorted(mounts_sorted, key=lambda el: el[2])
137for storage_id, root_id, user_id, mount_point in mounts_sorted:
138 assert mount_point.startswith("/" + user_id + "/")
139 mount_point = mount_point[len(user_id)+1:]
140 account = account_by_uid(conn12, user_id)
141 print(f"Missing mount {mount_point}, storage ID {storage_id}, owned by {account}..")
142 storage_owner = storage_owner_by_id(conn12, storage_id)
143 print(f" Storage owner: {storage_owner}")
144
145 parts = mount_point.split('/')
146 if len(parts) == 4 and parts[0] == '' and parts[1] == 'files' and parts[2] in mount_names_ok and parts[3] == '':
147 print(" Skipping, known okay")
148 continue
149 raise Exception("Unhandled")
150